diff --git a/jena-sparql-api-algebra/pom.xml b/jena-sparql-api-algebra/pom.xml index 0df92567a..ff38844ff 100644 --- a/jena-sparql-api-algebra/pom.xml +++ b/jena-sparql-api-algebra/pom.xml @@ -12,7 +12,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformCopyAdapter.java b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformCopyAdapter.java new file mode 100644 index 000000000..39a1e5903 --- /dev/null +++ b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformCopyAdapter.java @@ -0,0 +1,203 @@ +package org.aksw.jena_sparql_api.algebra.transform; + +import java.util.List; +import java.util.function.BiFunction; +import java.util.function.Function; + +import org.apache.jena.sparql.algebra.Op; +import org.apache.jena.sparql.algebra.TransformCopy; +import org.apache.jena.sparql.algebra.op.Op1; +import org.apache.jena.sparql.algebra.op.Op2; +import org.apache.jena.sparql.algebra.op.OpAssign; +import org.apache.jena.sparql.algebra.op.OpConditional; +import org.apache.jena.sparql.algebra.op.OpDiff; +import org.apache.jena.sparql.algebra.op.OpDisjunction; +import org.apache.jena.sparql.algebra.op.OpDistinct; +import org.apache.jena.sparql.algebra.op.OpExt; +import org.apache.jena.sparql.algebra.op.OpExtend; +import org.apache.jena.sparql.algebra.op.OpFilter; +import org.apache.jena.sparql.algebra.op.OpGraph; +import org.apache.jena.sparql.algebra.op.OpGroup; +import org.apache.jena.sparql.algebra.op.OpJoin; +import org.apache.jena.sparql.algebra.op.OpLabel; +import org.apache.jena.sparql.algebra.op.OpLeftJoin; +import org.apache.jena.sparql.algebra.op.OpList; +import org.apache.jena.sparql.algebra.op.OpMinus; +import org.apache.jena.sparql.algebra.op.OpN; +import org.apache.jena.sparql.algebra.op.OpOrder; +import org.apache.jena.sparql.algebra.op.OpProcedure; +import org.apache.jena.sparql.algebra.op.OpProject; +import org.apache.jena.sparql.algebra.op.OpPropFunc; +import org.apache.jena.sparql.algebra.op.OpReduced; +import org.apache.jena.sparql.algebra.op.OpSequence; +import org.apache.jena.sparql.algebra.op.OpSlice; +import org.apache.jena.sparql.algebra.op.OpTopN; +import org.apache.jena.sparql.algebra.op.OpUnion; + +/** + * An implementation of {@link TransformCopy} that forwards all calls to transformOpXXX methods + * with the appropriate signature. + * + * @author raven + * + */ +public class TransformCopyAdapter + extends TransformCopy +{ + public interface TriFunction { + R apply(A a, B b, C c); + } + + protected Op transformOpN( + OPN op, + List subOps, + BiFunction, ? extends Op> fallback) { + return fallback.apply(op, subOps); + } + + protected Op transformOp2( + OP2 op, + Op left, + Op right, + TriFunction fallback) { + return fallback.apply(op, left, right); + } + + protected Op transformOp1( + OP1 op, + Op subOp, + BiFunction fallback) { + return fallback.apply(op, subOp); + } + + protected Op execOpExt( + OpExt opExt, + Function fallback) { + return fallback.apply(opExt); + } + + // Op1 + @Override + public Op transform(OpFilter op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpGraph op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpProcedure op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpPropFunc op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpLabel op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpAssign op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpExtend op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + // Op2 + @Override + public Op transform(OpJoin op, Op left, Op right) { + return transformOp2(op, left, right, super::transform); + } + + @Override + public Op transform(OpLeftJoin op, Op left, Op right) { + return transformOp2(op, left, right, super::transform); + } + + @Override + public Op transform(OpDiff op, Op left, Op right) { + return transformOp2(op, left, right, super::transform); + } + + @Override + public Op transform(OpMinus op, Op left, Op right) { + return transformOp2(op, left, right, super::transform); + } + + @Override + public Op transform(OpUnion op, Op left, Op right) { + return transformOp2(op, left, right, super::transform); + } + + @Override + public Op transform(OpConditional op, Op left, Op right) { + return transformOp2(op, left, right, super::transform); + } + + // OpN + @Override + public Op transform(OpSequence op, List elts) { + return transformOpN(op, elts, super::transform); + } + + @Override + public Op transform(OpDisjunction op, List elts) { + return transformOpN(op, elts, super::transform); + } + + // Extensions + @Override + public Op transform(OpExt opExt) { + return execOpExt(opExt, super::transform); + } + + // OpModifier + @Override + public Op transform(OpList op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpOrder op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpTopN op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpProject op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpDistinct op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpReduced op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpSlice op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } + + @Override + public Op transform(OpGroup op, Op subOp) { + return transformOp1(op, subOp, super::transform); + } +} diff --git a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformEvalTable.java b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformEvalTable.java new file mode 100644 index 000000000..342ee1075 --- /dev/null +++ b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformEvalTable.java @@ -0,0 +1,114 @@ +package org.aksw.jena_sparql_api.algebra.transform; + +import java.util.List; +import java.util.function.BiFunction; + +import org.apache.jena.query.ARQ; +import org.apache.jena.sparql.ARQConstants; +import org.apache.jena.sparql.algebra.Op; +import org.apache.jena.sparql.algebra.Table; +import org.apache.jena.sparql.algebra.op.Op1; +import org.apache.jena.sparql.algebra.op.Op2; +import org.apache.jena.sparql.algebra.op.OpN; +import org.apache.jena.sparql.algebra.op.OpTable; +import org.apache.jena.sparql.algebra.table.TableN; +import org.apache.jena.sparql.core.DatasetGraph; +import org.apache.jena.sparql.core.DatasetGraphFactory; +import org.apache.jena.sparql.engine.ExecutionContext; +import org.apache.jena.sparql.engine.QueryIterator; +import org.apache.jena.sparql.engine.iterator.QueryIterRoot; +import org.apache.jena.sparql.engine.main.OpExecutor; +import org.apache.jena.sparql.engine.main.OpExecutorFactory; +import org.apache.jena.sparql.util.Context; +import org.apache.jena.sparql.util.NodeFactoryExtra; + +/** + * A transformer that evaluates every op whose sub ops are all {@link OpTable} instances + * to another {@link OpTable} instance. + * Note, that this mechanism materializes every intermediate result as a table. + * + * A more efficient approach may be to evaluate sub-expressions of an op using an {@link OpExecutor} and + * substituting the roots of these sub-expressions in op with the obtained results. + * + * @author raven + * + */ +public class TransformEvalTable + extends TransformCopyAdapter +{ + protected OpExecutor opExecutor; + protected ExecutionContext execCxt; + + public TransformEvalTable(OpExecutor opExecutor, ExecutionContext execCxt) { + super(); + this.opExecutor = opExecutor; + this.execCxt = execCxt; + } + + public static TransformEvalTable create() { + OpExecutorFactory opExecutorFactory = OpExecutor.stdFactory; + ExecutionContext execCxt = createExecCxt(opExecutorFactory); + OpExecutor opExecutor = opExecutorFactory.create(execCxt); + + return new TransformEvalTable(opExecutor, execCxt); + } + + //protected QueryIterRoot + public static ExecutionContext createExecCxt(OpExecutorFactory opExecutorFactory) { + Context cxt = ARQ.getContext().copy() ; + cxt.set(ARQConstants.sysCurrentTime, NodeFactoryExtra.nowAsDateTime()) ; + DatasetGraph dataset = DatasetGraphFactory.create(); + ExecutionContext execCxt = new ExecutionContext(cxt, dataset.getDefaultGraph(), dataset, opExecutorFactory); + + return execCxt; + } + + public OpTable exec(Op op) { + QueryIterator queryIter = opExecutor.executeOp(op , QueryIterRoot.create(execCxt)); + Table table = new TableN(queryIter); + return OpTable.create(table); + } + + @Override + protected Op transformOpN(OPN op, List subOps, BiFunction, ? extends Op> fallback) { + Op result; + + boolean isAllTables = subOps.stream().allMatch(subOp -> subOp instanceof OpTable); + if (isAllTables) { + Op tmp = op.copy(subOps); + result = exec(tmp); + } else { + result = fallback.apply(op, subOps); + } + + return result; + } + + @Override + protected Op transformOp2(OP2 op, Op left, Op right, TriFunction fallback) { + Op result; + + if (left instanceof OpTable && right instanceof OpTable) { + Op tmp = op.copy(left, right); + result = exec(tmp); + } else { + result = fallback.apply(op, left, right); + } + + return result; + } + + @Override + protected Op transformOp1(OP1 op, Op subOp, BiFunction fallback) { + Op result = null; + + if (subOp instanceof OpTable) { + Op tmp = op.copy(subOp); + result = exec(tmp); + } else { + result = fallback.apply(op, subOp); + } + + return result; + } +} diff --git a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformExpandAggCountDistinct.java b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformExpandAggCountDistinct.java new file mode 100644 index 000000000..a6b6e9926 --- /dev/null +++ b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformExpandAggCountDistinct.java @@ -0,0 +1,52 @@ +package org.aksw.jena_sparql_api.algebra.transform; + +import java.util.Collections; +import java.util.List; + +import org.apache.jena.sparql.algebra.Op; +import org.apache.jena.sparql.algebra.TransformCopy; +import org.apache.jena.sparql.algebra.op.OpDistinct; +import org.apache.jena.sparql.algebra.op.OpGroup; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.expr.ExprAggregator; +import org.apache.jena.sparql.expr.aggregate.AggCount; +import org.apache.jena.sparql.expr.aggregate.AggCountDistinct; +import org.apache.jena.sparql.expr.aggregate.Aggregator; + +/** + * Transform GROUP([count(distinct *) AS ?.0], subOp) + * to GROUP([count(*) AS ?.0], DISTINCT(subOp)) + * + * Used to mitigate a bug in Virtuoso + * + * @author raven + * + */ +public class TransformExpandAggCountDistinct + extends TransformCopy +{ + @Override + public Op transform(OpGroup op, Op subOp) { + + Op tmp = null; + + List eas = op.getAggregators(); + if (eas.size() == 1) { + ExprAggregator ea = eas.get(0); + Var ev = ea.getVar(); + Aggregator a = ea.getAggregator(); + if (a instanceof AggCountDistinct) { + tmp = new OpGroup( + new OpDistinct(subOp), + op.getGroupVars(), + Collections.singletonList(new ExprAggregator(ev, new AggCount()))); + } + } + + Op result = tmp != null + ? tmp + : super.transform(op, subOp); + + return result; + } +} diff --git a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformExprToBasicPattern.java b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformExprToBasicPattern.java index 7f54724bb..71986964b 100644 --- a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformExprToBasicPattern.java +++ b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformExprToBasicPattern.java @@ -118,7 +118,7 @@ public Expr doTransform(ExprFunction func, Expr arg) { Node p = NodeFactory.createURI(substInfo.getKey()); boolean subjectAsOutput = substInfo.getValue(); - Triple t = TripleUtils.create(argNode, p, v, subjectAsOutput); + Triple t = TripleUtils.create(argNode, p, v, !subjectAsOutput); triples.add(t); result = new ExprVar(v); diff --git a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformFactorizeTableColumnsToExtend.java b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformFactorizeTableColumnsToExtend.java new file mode 100644 index 000000000..69333af33 --- /dev/null +++ b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/transform/TransformFactorizeTableColumnsToExtend.java @@ -0,0 +1,130 @@ +package org.aksw.jena_sparql_api.algebra.transform; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.jena.ext.com.google.common.collect.Sets; +import org.apache.jena.graph.Node; +import org.apache.jena.query.ResultSet; +import org.apache.jena.sparql.algebra.Op; +import org.apache.jena.sparql.algebra.Table; +import org.apache.jena.sparql.algebra.TransformCopy; +import org.apache.jena.sparql.algebra.op.OpExtend; +import org.apache.jena.sparql.algebra.op.OpProject; +import org.apache.jena.sparql.algebra.op.OpTable; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.core.VarExprList; +import org.apache.jena.sparql.engine.binding.Binding; +import org.apache.jena.sparql.expr.NodeValue; + +/** + * + * Given a table extract all variables that map to the same constant to a bind block: + * + * Given: + * + * VALUES (?x ?y) { + * (a b1) + * (a b2) + * } + * + * it becomes: + * + * VALUES (?y) { b1 b2 ) + * BIND(a AS ?x) + * + */ +public class TransformFactorizeTableColumnsToExtend + extends TransformCopy +{ + /** + * Virtuoso does not support an empty table - whereas jena does. + * I.e. the following works with jena but not with virtuoso: + * + * SELECT * { VALUES () { () () () } BIND ( AS ?x) } + * + */ + protected boolean preventEmptyTable = true; + + @Override + public Op transform(OpTable opTable) { + Table table = opTable.getTable(); + Map constants = extractConstants(table.toResultSet()); + + Set tableVars = new LinkedHashSet<>(table.getVars()); + Set constantVars = constants.keySet(); + + if (constantVars.containsAll(tableVars) && preventEmptyTable && !tableVars.isEmpty()) { + // The following statement implicitly removes the entry from 'constants' + constantVars.remove(tableVars.iterator().next()); + } + + Op result; + if (constants.isEmpty()) { + result = opTable; + } else { + VarExprList vel = new VarExprList(); + +// VarExprListUtils.createFromMap(map) + for (Entry e : constants.entrySet()) { + vel.add(e.getKey(), NodeValue.makeNode(e.getValue())); + } + + List remainingTableVars = new ArrayList<>(Sets.difference(tableVars, constantVars)); + OpTable newTable = TransformEvalTable.create().exec(new OpProject(opTable, remainingTableVars)); + + result = OpExtend.extend(newTable, vel); + } + + return result; + } + + + /** + * Yield all variable-value pairs where the variable is mapped to the same + * value across all given bindings. The value may be null. + * + * @param rs + * @return + */ + public static Map extractConstants(ResultSet rs) { + Set candVars = rs.getResultVars().stream() + .map(Var::alloc).collect(Collectors.toSet()); + + Map result = new HashMap<>(); + while (rs.hasNext()) { + Binding b = rs.nextBinding(); + + Iterator itVar = candVars.iterator(); + while (itVar.hasNext()) { + Var v = itVar.next(); + Node n = b.get(v); + + // We need to use containsKey because null values are allowed + if (result.containsKey(v)) { + Node prev = result.get(v); + if (!Objects.equals(prev, n)) { + itVar.remove(); + result.remove(v); + + if (candVars.isEmpty()) { + break; + } + } + } else { + result.put(v, n); + } + } + } + + return result; + } +} diff --git a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/FixpointIteration.java b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/FixpointIteration.java index 6337bc080..03a83c5ee 100644 --- a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/FixpointIteration.java +++ b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/FixpointIteration.java @@ -8,34 +8,39 @@ public class FixpointIteration { - private static Logger logger = LoggerFactory.getLogger(FixpointIteration.class); - - public static T apply(T op, Function transform) { - T current; - do { - current = op; - op = transform.apply(current); - } while(!current.equals(op)); - - return current; - } - - public static T apply(int max, T init, Function fn) { - T result = init; - - int i = 0; - for(; i < max; ++i) { - T tmp = fn.apply(result); - if(Objects.equals(tmp, result)) { - break; - } - result = tmp; - } - - if(i >= max) { - logger.warn("Fixpoint iteration reached iteration threshold"); - } - - return result; - } + private static Logger logger = LoggerFactory.getLogger(FixpointIteration.class); + + public static Function createClosure(Function transform) { + return op -> apply(op, transform); + } + + + public static T apply(T op, Function transform) { + T current; + do { + current = op; + op = transform.apply(current); + } while(!current.equals(op)); + + return current; + } + + public static T apply(int max, T init, Function fn) { + T result = init; + + int i = 0; + for(; i < max; ++i) { + T tmp = fn.apply(result); + if(Objects.equals(tmp, result)) { + break; + } + result = tmp; + } + + if(i >= max) { + logger.warn("Fixpoint iteration reached iteration threshold"); + } + + return result; + } } diff --git a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/VirtualPartitionedQuery.java b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/VirtualPartitionedQuery.java index c8ce6190c..42215f52b 100644 --- a/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/VirtualPartitionedQuery.java +++ b/jena-sparql-api-algebra/src/main/java/org/aksw/jena_sparql_api/algebra/utils/VirtualPartitionedQuery.java @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -17,6 +18,7 @@ import org.aksw.jena_sparql_api.concepts.RelationUtils; import org.aksw.jena_sparql_api.concepts.TernaryRelation; import org.aksw.jena_sparql_api.concepts.TernaryRelationImpl; +import org.aksw.jena_sparql_api.concepts.UnaryRelation; import org.aksw.jena_sparql_api.concepts.XExpr; import org.aksw.jena_sparql_api.utils.ElementUtils; import org.aksw.jena_sparql_api.utils.NodeTransformRenameMap; @@ -46,7 +48,7 @@ //class Contrib { // protected BinaryRelation reachingRelation; // protected TernaryRelation graphRelation; -// +// // public Contrib(BinaryRelation reachingRelation, TernaryRelation graphRelation) { // super(); // this.reachingRelation = reachingRelation; @@ -83,7 +85,7 @@ //class PathResolverSimple

> // implements PathResolver //{ -// +// // @Override // public P parent() { // // TODO Auto-generated method stub @@ -97,12 +99,12 @@ // // @Override // public BinaryRelation value() { -// -// +// +// // // TODO Auto-generated method stub // return null; // } -// +// //} @@ -123,46 +125,46 @@ * In a virtual RDF graph, the naming of the variables are meaningless anyway, * as the rewriting system on top just cares about subject, predicate and object positions * but not how they are named. - * - * + * + * * @author raven * */ public class VirtualPartitionedQuery { - private static final Logger logger = LoggerFactory.getLogger(VirtualPartitionedQuery.class); - + private static final Logger logger = LoggerFactory.getLogger(VirtualPartitionedQuery.class); + // // public static void rewrite(Collection views, Iterable> aliasedPath) { // // Rewrite a path over a collection of partitioned query views -// -// -// +// +// +// // //return null; // } -// +// // public void step(Collection views, P_Path0 step, String alias) { // for(PartitionedQuery1 pq : views) { -// +// // } // } // -// +// // // Note: The code below may not work with literals in the template due to // // jena not allowing literals to act as resources // // but actually its a pointless limitation for our purposes // public Resolver createResolver(PartitionedQuery1 pq, Iterable path) { // Node rootNode = pq.getPartitionVar(); -// +// // Query query = pq.getQuery(); // Template template = query.getConstructTemplate(); // GraphVar graphVar = new GraphVarImpl(GraphFactory.createDefaultGraph()); // GraphUtil.add(graphVar, template.getTriples()); // Model model = ModelFactory.createModelForGraph(graphVar); -// +// // Resource root = model.getRDFNode(rootNode).asResource(); // System.out.println(root.listProperties().toList()); // -// Collection starts = Collections.singleton(root); +// Collection starts = Collections.singleton(root); // for(P_Path0 step : path) { //// Property p = ResourceUtils.getProperty(step); // List targets = @@ -171,120 +173,167 @@ public class VirtualPartitionedQuery { // .collect(Collectors.toList()); // starts = targets; // } -// -// +// +// // //Element basePattern = query.getQueryPattern(); // // Set result = starts.stream().map(RDFNode::asNode).collect(Collectors.toSet()); // return result; // } -// +// //// public static Set resolve(PartitionedQuery1 pq, Collection startVars, P_Path0 step) { -//// +//// //// } -// -// +// +// // public static Set resolve() { // //Relation baseRelation = RelationImpl.create(basePattern, PatternVars.vars(basePattern)); // // //FacetedQueryGenerator.createRelationForPath(PathToRelationMapper

mapper, PathAccessor

pathAccessor, P childPath, boolean includeAbsent) { // -// +// // List trs; // for(RDFNode target : targets) { // // Generate the triple pattern (target, p, o) // Var var = (Var)target.asNode(); // System.out.println(var); -// +// // BinaryRelation br = // BinaryRelationImpl.create(var, Vars.p, Vars.o, isFwd) // .joinOn(var).with(new Concept(basePattern, var)) // .toBinaryRelation(); -// -// } +// +// } // } -// -// - - +// +// + + // public static Resolver createResolver(PartitionedQuery1 pq) { // RDFNode node = toRdfModel(pq); // Resolver result = new ResolverTemplate(pq, Collections.singleton(node)); // return result; // } - - + + // public void step(SimplePath basePath, PartitionedQuery1 pq, P_Path0 step, boolean isFwd, String alias) { // System.out.println(root.listProperties().toList()); -// +// // Property p = ResourceUtils.getProperty(step); // List targets = ResourceUtils.listPropertyValues(root, step).toList(); -// +// // Element basePattern = query.getQueryPattern(); // //Relation baseRelation = RelationImpl.create(basePattern, PatternVars.vars(basePattern)); // // //FacetedQueryGenerator.createRelationForPath(PathToRelationMapper

mapper, PathAccessor

pathAccessor, P childPath, boolean includeAbsent) { // -// +// // List trs; // for(RDFNode target : targets) { // // Generate the triple pattern (target, p, o) // Var var = (Var)target.asNode(); // System.out.println(var); -// +// // BinaryRelation br = // BinaryRelationImpl.create(var, Vars.p, Vars.o, isFwd) // .joinOn(var).with(new Concept(basePattern, var)) // .toBinaryRelation(); -// +// // } -// -//// // Resolve the path to a +// +//// // Resolve the path to a //// PathAccessorRdf pathAccessor = new PathAccessorSimplePath(); //// PathToRelationMapper mapper = new PathToRelationMapper<>(pathAccessor, "w"); //// //// basePath. //// mapper.getOverallRelation(path); -// +// //// BinaryRelation br = //// BinaryRelationImpl.create(var, Vars.p, Vars.o, isFwd) //// .joinOn(var).with(new Concept(basePattern, var)) //// .toBinaryRelation(); -// -// -// +// +// +// // System.out.println(ResourceUtils.listPropertyValues(root, step).toList()); // } - public static TernaryRelation unionTernary(Collection items) { - Relation tmp = union(items, Arrays.asList(Vars.s, Vars.p, Vars.o)); - TernaryRelation result = tmp.toTernaryRelation(); - return result; - } - - - public static Relation union(Collection items, List proj) { - List elements = items.stream() - .map(e -> RelationUtils.rename(e, proj)) - .map(Relation::getElement) - .collect(Collectors.toList()); - - Element e = ElementUtils.unionIfNeeded(elements); - - Relation result = new RelationImpl(e, proj); - return result; - } - - + public static TernaryRelation unionTernary(Collection items) { + Relation tmp = union(items, Arrays.asList(Vars.s, Vars.p, Vars.o)); + TernaryRelation result = tmp.toTernaryRelation(); + return result; + } + + /** + * Create a union from a given collection of unary relations + * If the collection is empty, return a relation with an empty (unit) table. + * + * TODO Move to ConceptUtils + * + * @param relations + * @return + */ + public static UnaryRelation unionUnary(Collection relations) { + Relation tmp = VirtualPartitionedQuery.union(relations, Collections.singletonList(Vars.s)); + UnaryRelation result = tmp.toUnaryRelation(); + return result; + +// Set mentionedVars = relations.stream() +// .map(Relation::getVarsMentioned) +// .flatMap(Collection::stream) +// .collect(Collectors.toSet()); +// UnaryRelation result; +// +// if (relations.isEmpty()) { +// result = new Concept(new ElementData(), Vars.x); +// } else { +// +// UnaryRelation first = relations.iterator().next(); +// Var tgtVar = first.getVar(); +// List tgtVars = Collections.singletonList(tgtVar); +// +// result = VirtualPartitionedQuery.union(relations, tgtVars).toUnaryRelation(); + +// List elements = relations.stream() +// .map(r -> RelationUtils.rename(r, tgtVars)) +// .map(Relation::toUnaryRelation) +// .map(Relation::getElement) +// .collect(Collectors.toList()); + +// Element e = ElementUtils.unionIfNeeded(elements); + +// result = new Concept(e, tgtVar); +// } +// return result; + } + + public static Relation union(Collection items, List proj) { + + // TODO Handle the case where items is empty + // Option 1: Inject FILTER(false) (but this does not project vars) + // Option 2: Inject VALUES(proj) { } + + List elements = items.stream() + .map(e -> RelationUtils.rename(e, proj)) + .map(Relation::getElement) + .collect(Collectors.toList()); + + Element e = ElementUtils.unionIfNeeded(elements); + + Relation result = new RelationImpl(e, proj); + return result; + } + + // public static Query rewrite(Resolver resolver, boolean isFwd, Query query) { // Collection views = resolver.getContrib(true); // // TernaryRelation tr = unionTernary(views); //// System.out.println(tr); -// +// // GenericLayer layer = GenericLayer.create(tr); -// +// // Query raw = ElementTransformTripleRewrite.transform(query, layer, true); // Query result = DataQueryImpl.rewrite(raw, DataQueryImpl.createDefaultRewriter()::rewrite); // @@ -297,241 +346,241 @@ public static Relation union(Collection items, List pro // // return result; // } -// - - public static Query rewrite(Collection views, Query query) { +// + + public static Query rewrite(Collection views, Query query) { // Resolver resolver = createResolver(view, viewVar); // Query result = rewrite(resolver, true, query); - TernaryRelation tr = unionTernary(views); + TernaryRelation tr = unionTernary(views); // System.out.println(tr); - - GenericLayer layer = GenericLayer.create(tr); - - Query raw = ElementTransformTripleRewrite.transform(query, layer, true); - logger.debug("Query over View: Raw rewritten query:\n" + raw); - - Query result = QueryUtils.rewrite(raw, AlgebraUtils.createDefaultRewriter()::rewrite); - logger.debug("Query over View: Final rewritten query:\n" + result); - - return result; - } - - - /** - * - * @return The updated partitioned query with the variable set to the target of the path - * - * TODO Maybe we want to return a PartitionedQuery2 - with source and target var - */ - /* - public static PartitionedQuery1 extendQueryWithPath(PartitionedQuery1 base, AliasedPath path) { - Var targetVar = Var.alloc("todo-fresh-var"); - - ResolverNode node = ResolverNodeImpl.from(base, null); - ResolverNode target = node.walk(path); - - Collection rawBrs = target.getPaths(); - - // Set the target variable of the paths to the desired alias + + GenericLayer layer = GenericLayer.create(tr); + + Query raw = ElementTransformTripleRewrite.transform(query, layer, true); + logger.debug("Query over View: Raw rewritten query:\n" + raw); + + Query result = QueryUtils.rewrite(raw, AlgebraUtils.createDefaultRewriter()::rewrite); + logger.debug("Query over View: Final rewritten query:\n" + result); + + return result; + } + + + /** + * + * @return The updated partitioned query with the variable set to the target of the path + * + * TODO Maybe we want to return a PartitionedQuery2 - with source and target var + */ + /* + public static PartitionedQuery1 extendQueryWithPath(PartitionedQuery1 base, AliasedPath path) { + Var targetVar = Var.alloc("todo-fresh-var"); + + ResolverNode node = ResolverNodeImpl.from(base, null); + ResolverNode target = node.walk(path); + + Collection rawBrs = target.getPaths(); + + // Set the target variable of the paths to the desired alias // Collection brs = rawBrs.stream() // .map(br -> RelationUtils.rename(br, Arrays.asList(br.getSourceVar(), targetVar)).toBinaryRelation()) // .collect(Collectors.toList()); - - for(BinaryRelation br : rawBrs) { - System.out.println("Relation: " + br); - } - - return null; - } - */ - - /** - * Convert each triple pattern occuring in the template of a SPARQL construct query - * into a ternary relation. This is a somewhat poor-man's approach to creating views over rdf data: - * An improved rewriter would not treat the triple patterns in isolation, but rather take care of - * doing self-join elimination if multiple triple patterns of a view match that of a query. - * - * - */ - public static Collection toViews(Query query) { - if(!query.isConstructType() || query.isConstructQuad()) { - throw new RuntimeException("Construct query (without quads) expected"); - } - - Op op = Algebra.compile(query); - - Set visibleVars = OpVars.visibleVars(op); - Generator gen = VarGeneratorBlacklist.create(visibleVars); - - Collection result = new ArrayList<>(); - Template template = query.getConstructTemplate(); - //BasicPattern bgp = template.getBGP(); - //TransformReplaceConstants.transform(new OpBGP(bgp)); - - - Element pattern = query.getQueryPattern(); - for(Triple t : template.getTriples()) { - List nodes = TripleUtils.tripleToList(t); - Map nodeToVar = new HashMap<>(); - Map substs = TransformReplaceConstants.transform(nodeToVar, nodes, gen); - - Triple newT = NodeTransformLib.transform(new NodeTransformRenameMap(substs), t); - - Element newE; - if(substs.isEmpty()) { - newE = pattern; - } else { - ElementGroup tgt = new ElementGroup(); - ElementUtils.copyElements(tgt, pattern); - - // Add the BINDs afterwards in order to get a nicer algebra: - // We get extend(subOp, bindings) instead of join(extend(unit, bindings), subOp) - for(Entry e : substs.entrySet()) { - tgt.addElement(new ElementBind(e.getValue(), NodeValue.makeNode(e.getKey()))); - } - - newE = tgt; - } - - - TernaryRelation tr = new TernaryRelationImpl(newE, - (Var)newT.getSubject(), - (Var)newT.getPredicate(), - (Var)newT.getObject()); - - result.add(tr); - } - - return result; - } - - -/* - public static void main(String[] args) { + + for(BinaryRelation br : rawBrs) { + System.out.println("Relation: " + br); + } + + return null; + } + */ + + /** + * Convert each triple pattern occuring in the template of a SPARQL construct query + * into a ternary relation. This is a somewhat poor-man's approach to creating views over rdf data: + * An improved rewriter would not treat the triple patterns in isolation, but rather take care of + * doing self-join elimination if multiple triple patterns of a view match that of a query. + * + * + */ + public static Collection toViews(Query query) { + if(!query.isConstructType() || query.isConstructQuad()) { + throw new RuntimeException("Construct query (without quads) expected"); + } + + Op op = Algebra.compile(query); + + Set visibleVars = OpVars.visibleVars(op); + Generator gen = VarGeneratorBlacklist.create(visibleVars); + + Collection result = new ArrayList<>(); + Template template = query.getConstructTemplate(); + //BasicPattern bgp = template.getBGP(); + //TransformReplaceConstants.transform(new OpBGP(bgp)); + + + Element pattern = query.getQueryPattern(); + for(Triple t : template.getTriples()) { + List nodes = TripleUtils.tripleToList(t); + Map nodeToVar = new HashMap<>(); + Map substs = TransformReplaceConstants.transform(nodeToVar, nodes, gen); + + Triple newT = NodeTransformLib.transform(new NodeTransformRenameMap(substs), t); + + Element newE; + if(substs.isEmpty()) { + newE = pattern; + } else { + ElementGroup tgt = new ElementGroup(); + ElementUtils.copyElements(tgt, pattern); + + // Add the BINDs afterwards in order to get a nicer algebra: + // We get extend(subOp, bindings) instead of join(extend(unit, bindings), subOp) + for(Entry e : substs.entrySet()) { + tgt.addElement(new ElementBind(e.getValue(), NodeValue.makeNode(e.getKey()))); + } + + newE = tgt; + } + + + TernaryRelation tr = new TernaryRelationImpl(newE, + (Var)newT.getSubject(), + (Var)newT.getPredicate(), + (Var)newT.getObject()); + + result.add(tr); + } + + return result; + } + + +/* + public static void main(String[] args) { //CONSTRUCT { ?s ?p ?o } WHERE {?x ?s . ?x ?p ?o } - - - if(true) { - List views = Arrays.asList( - //new TernaryRelationImpl(Concept.parseElement("{ ?s ?p ?o }", null), Vars.s, Vars.p, Vars.o), - new TernaryRelationImpl(Concept.parseElement( - "{ ?x ?s" - + ". ?x ?p ?o }", null), Vars.s, Vars.p, Vars.o) - ); - - //Query view = QueryFactory.create("CONSTRUCT {?s ?p ?o } { ?s ?pRaw ?o . BIND(URI(CONCAT('http://foobar', STR(?pRaw))) AS ?p) }"); - //PartitionedQuery1 pq = PartitionedQuery1.from(view, Vars.s); - //Resolver resolver = Resolvers.from(pq); + + + if(true) { + List views = Arrays.asList( + //new TernaryRelationImpl(Concept.parseElement("{ ?s ?p ?o }", null), Vars.s, Vars.p, Vars.o), + new TernaryRelationImpl(Concept.parseElement( + "{ ?x ?s" + + ". ?x ?p ?o }", null), Vars.s, Vars.p, Vars.o) + ); + + //Query view = QueryFactory.create("CONSTRUCT {?s ?p ?o } { ?s ?pRaw ?o . BIND(URI(CONCAT('http://foobar', STR(?pRaw))) AS ?p) }"); + //PartitionedQuery1 pq = PartitionedQuery1.from(view, Vars.s); + //Resolver resolver = Resolvers.from(pq); // FILTER(?s = ) - String queryStr = "SELECT ?s ?o { ?s a ; ?o . FILTER(?s = )}"; - - Query example1 = rewrite( - views, - QueryFactory.create(queryStr)); - System.out.println("Example 1\n" + example1); - - try(RDFConnection conn = RDFConnectionFactory.connect("https://query.wikidata.org/sparql")) { - - //example1 = DataQueryImpl.rewrite(example1, DataQueryImpl.createDefaultRewriter()::rewrite); - try(QueryExecution qe = conn.query(example1)) { - System.out.println(ResultSetFormatter.asText(qe.execSelect())); - } - } - - return; - } - - - Query view = QueryFactory.create("CONSTRUCT { ?p ?c } { { SELECT ?p (COUNT(?o) AS ?c) { ?s ?p ?o } GROUP BY ?p } }"); - PartitionedQuery1 pq = PartitionedQuery1.from(view, Vars.p); - Resolver resolver = Resolvers.from(pq); - - if(false) { - - Query example1 = rewrite( - resolver - .getRdfGraphSpec(true), - QueryFactory.create("SELECT ?x ?y ?z { ?x ?y ?z }")); - System.out.println("Example 1\n" + example1); - - Query example2 = rewrite( - resolver - .getRdfGraphSpec(true), - QueryFactory.create("SELECT DISTINCT ?y { ?x ?y ?z }")); - System.out.println("Example 2\n" + example2); - - Query example3 = rewrite( - resolver - .resolve(new P_Link(NodeFactory.createURI("http://facetCount"))) - .getRdfGraphSpec(true), - QueryFactory.create("SELECT ?x ?y ?z { ?x ?y ?z }")); - System.out.println("Example 3\n" + example3); - - Query example4a = rewrite( - resolver - .resolve(new P_Link(NodeFactory.createURI("http://facetCount"))) - .getRdfGraphSpec(true), - QueryFactory.create("SELECT DISTINCT ?y { ?x ?y ?z }")); - System.out.println("Example 4a\n" + example4a); - Query example4b = rewrite( - resolver - .resolve(new P_Link(NodeFactory.createURI("http://facetCount")), "someAlias") - .getRdfGraphSpec(true), - QueryFactory.create("SELECT DISTINCT ?y { ?x ?y ?z }")); - System.out.println("Example 4b\n" + example4b); - } - - // TODO We may need to tag alias as whether it corresponds to a fixed var name - // or a relative path id + String queryStr = "SELECT ?s ?o { ?s a ; ?o . FILTER(?s = )}"; + + Query example1 = rewrite( + views, + QueryFactory.create(queryStr)); + System.out.println("Example 1\n" + example1); + + try(RDFConnection conn = RDFConnectionFactory.connect("https://query.wikidata.org/sparql")) { + + //example1 = DataQueryImpl.rewrite(example1, DataQueryImpl.createDefaultRewriter()::rewrite); + try(QueryExecution qe = conn.query(example1)) { + System.out.println(ResultSetFormatter.asText(qe.execSelect())); + } + } + + return; + } + + + Query view = QueryFactory.create("CONSTRUCT { ?p ?c } { { SELECT ?p (COUNT(?o) AS ?c) { ?s ?p ?o } GROUP BY ?p } }"); + PartitionedQuery1 pq = PartitionedQuery1.from(view, Vars.p); + Resolver resolver = Resolvers.from(pq); + + if(false) { + + Query example1 = rewrite( + resolver + .getRdfGraphSpec(true), + QueryFactory.create("SELECT ?x ?y ?z { ?x ?y ?z }")); + System.out.println("Example 1\n" + example1); + + Query example2 = rewrite( + resolver + .getRdfGraphSpec(true), + QueryFactory.create("SELECT DISTINCT ?y { ?x ?y ?z }")); + System.out.println("Example 2\n" + example2); + + Query example3 = rewrite( + resolver + .resolve(new P_Link(NodeFactory.createURI("http://facetCount"))) + .getRdfGraphSpec(true), + QueryFactory.create("SELECT ?x ?y ?z { ?x ?y ?z }")); + System.out.println("Example 3\n" + example3); + + Query example4a = rewrite( + resolver + .resolve(new P_Link(NodeFactory.createURI("http://facetCount"))) + .getRdfGraphSpec(true), + QueryFactory.create("SELECT DISTINCT ?y { ?x ?y ?z }")); + System.out.println("Example 4a\n" + example4a); + Query example4b = rewrite( + resolver + .resolve(new P_Link(NodeFactory.createURI("http://facetCount")), "someAlias") + .getRdfGraphSpec(true), + QueryFactory.create("SELECT DISTINCT ?y { ?x ?y ?z }")); + System.out.println("Example 4b\n" + example4b); + } + + // TODO We may need to tag alias as whether it corresponds to a fixed var name + // or a relative path id // System.out.println( // resolver -// .resolve(new P_Link(NodeFactory.createURI("http://facetCount")), "p") -// .resolve(new P_Link(NodeFactory.createURI("http://label")), "labelAlias") +// .resolve(new P_Link(NodeFactory.createURI("http://facetCount")), "p") +// .resolve(new P_Link(NodeFactory.createURI("http://label")), "labelAlias") // .getPaths()); - AliasedPath path = PathBuilderNode.start() - .fwd("http://facetCount").viaAlias("a") - .fwd("http://label").one()//viaAlias("b") - .aliasedPath(); - - if(false) { - path = PathBuilderNode.start() - .fwd("http://facetCount").one() - .fwd("http://label").one() - .aliasedPath(); - } - - System.out.println("built path: " + path); - - - // High level API: + AliasedPath path = PathBuilderNode.start() + .fwd("http://facetCount").viaAlias("a") + .fwd("http://label").one()//viaAlias("b") + .aliasedPath(); + + if(false) { + path = PathBuilderNode.start() + .fwd("http://facetCount").one() + .fwd("http://label").one() + .aliasedPath(); + } + + System.out.println("built path: " + path); + + + // High level API: // System.out.println("Paths: " + (ResolverNode.from(resolver) // .fwd("http://facetCount").viaAlias("a") // .fwd("http://label").viaAlias("b") // .getPaths()); - - System.out.println(pq); - extendQueryWithPath(pq, path); - + + System.out.println(pq); + extendQueryWithPath(pq, path); + // // System.out.println(resolver -// .resolve(new P_Link(NodeFactory.createURI("http://facetCount"))) +// .resolve(new P_Link(NodeFactory.createURI("http://facetCount"))) // .getPaths()); - } - */ - - static class GeneralizedStep { - boolean isFwd; - XExpr expr; - } - - //processor.step(pq, new P_Link(NodeFactory.createURI("http://facetCount")), true, "a"); - - - //VirtualPartitionedQuery processor = new VirtualPartitionedQuery(); - + } + */ + + static class GeneralizedStep { + boolean isFwd; + XExpr expr; + } + + //processor.step(pq, new P_Link(NodeFactory.createURI("http://facetCount")), true, "a"); + + + //VirtualPartitionedQuery processor = new VirtualPartitionedQuery(); + // Query query = QueryFactory.create("CONSTRUCT { ?city ?mayor . ?mayor ?party } { ?city ?mayor . ?mayor ?party }"); diff --git a/jena-sparql-api-algebra/src/test/java/org/aksw/jena_sparql_api/core/utils/QueryGenerationUtilsTests.java b/jena-sparql-api-algebra/src/test/java/org/aksw/jena_sparql_api/core/utils/QueryGenerationUtilsTests.java index 65deda13d..7f47691a4 100644 --- a/jena-sparql-api-algebra/src/test/java/org/aksw/jena_sparql_api/core/utils/QueryGenerationUtilsTests.java +++ b/jena-sparql-api-algebra/src/test/java/org/aksw/jena_sparql_api/core/utils/QueryGenerationUtilsTests.java @@ -102,6 +102,7 @@ public void testProject2() { public void testOptimizeAggToDistinctX() { Query query = QueryFactory.create("SELECT (?s AS ?x) ?o { ?s ?p ?o } GROUP BY ?s ?o"); // QueryGenerationUtils.optimizeAggregationToDistinct(query); + System.out.println("TODO Validate testOptimizeAggToDistinctX"); System.out.println(QueryGenerationUtils.analyzeDistinctVarSets(query)); System.out.println(query); } @@ -160,8 +161,6 @@ public void testCountQueryGenerationA1() { eval( "SELECT ?s ?o { ?s ?p ?o }", input -> { - Collection vars = Arrays.asList(s); - // Expected SELECT (COUNT(*) AS ?c_1) { ?s ?p ?o } Entry count = QueryGenerationUtils.createQueryCountCore(input, null, null); return count.getValue(); }, @@ -199,7 +198,6 @@ public void testCountQueryGenerationA4() { eval( "SELECT ?s (AVG(?o) AS ?c) { ?s ?p ?o } GROUP BY ?s ?p", input -> { - Collection vars = Arrays.asList(s, Var.alloc("c")); Entry count = QueryGenerationUtils.createQueryCountCore(input, null, null); return count.getValue(); }, @@ -306,4 +304,33 @@ public void testCountQueryGenerationForLsqBug() { ); } + @Test + public void testCountQueryGenerationForLimitBug() { + eval( + "SELECT * { ?s ?p ?o } LIMIT 100 OFFSET 5", + input -> { + Entry count = QueryGenerationUtils.createQueryCountCore(input, 50l, null); + return count.getValue(); + }, + "SELECT (COUNT(*) AS ?c_1) { SELECT * { ?s ?p ?o } LIMIT 50 OFFSET 5 }" + ); + } + + /** + * Queries only with constants caused an exception in the rewrite stating that there need to be + * variables + * + */ + @Test + public void testCountQueryGenerationForLsqBug2() { + eval( + "SELECT * { }", + input -> { + Entry count = QueryGenerationUtils.createQueryCountCore(input, 1000l, null); + return count.getValue(); + }, + "SELECT (COUNT(*) AS ?c_1) { SELECT * { } LIMIT 1000 }" + ); + } + } diff --git a/jena-sparql-api-batch/pom.xml b/jena-sparql-api-batch/pom.xml index 73bc8cf4b..de74a6bc7 100644 --- a/jena-sparql-api-batch/pom.xml +++ b/jena-sparql-api-batch/pom.xml @@ -11,7 +11,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-batch/src/main/java/org/aksw/jena_sparql_api/batch/cli/main/DatasetGraphDiff.java b/jena-sparql-api-batch/src/main/java/org/aksw/jena_sparql_api/batch/cli/main/DatasetGraphDiff.java deleted file mode 100644 index 248d7c45f..000000000 --- a/jena-sparql-api-batch/src/main/java/org/aksw/jena_sparql_api/batch/cli/main/DatasetGraphDiff.java +++ /dev/null @@ -1,157 +0,0 @@ -package org.aksw.jena_sparql_api.batch.cli.main; - -import java.util.Iterator; - -import com.google.common.base.Predicate; -import com.google.common.collect.Iterators; -import org.apache.jena.graph.Graph; -import org.apache.jena.graph.Node; -import org.apache.jena.query.ReadWrite; -import org.apache.jena.query.TxnType; -import org.apache.jena.sparql.core.DatasetGraph; -import org.apache.jena.sparql.core.DatasetGraphBase; -import org.apache.jena.sparql.core.Quad; - -class PredicateIfQuadExists - implements Predicate -{ - protected DatasetGraph datasetGraph; - - public PredicateIfQuadExists(DatasetGraph datasetGraph) { - super(); - this.datasetGraph = datasetGraph; - } - - @Override - public boolean apply(Quad quad) { - boolean result = datasetGraph.contains(quad); - return result; - } -} - -public class DatasetGraphDiff - extends DatasetGraphBase -{ - protected DatasetGraph core; - - protected DatasetGraph added; - protected DatasetGraph removed; - - - @Override - public Iterator find(Node g, Node s, Node p, Node o) { - Predicate pred = new PredicateIfQuadExists(removed); - - Iterator itAdded = added.find(g, s, p, o); - - Iterator result = core.find(g, s, p, o); - result = Iterators.filter(result, pred); - result = Iterators.concat(result, itAdded); - - return result; - } - - @Override - public Iterator findNG(Node g, Node s, Node p, Node o) { - Predicate pred = new PredicateIfQuadExists(removed); - - Iterator itAdded = added.findNG(g, s, p, o); - - Iterator result = core.findNG(g, s, p, o); - result = Iterators.filter(result, pred); - result = Iterators.concat(result, itAdded); - - return result; - } - - @Override - public Iterator listGraphNodes() { - Iterator result = core.listGraphNodes(); - return result; - } - - @Override - public void addGraph(Node arg0, Graph arg1) { - } - - @Override - public Graph getDefaultGraph() { - return null; -// Graph result = core.getDefaultGraph(); -// return result; - } - - @Override - public Graph getGraph(Node arg0) { - return null; -// Graph result = core.getDefaultGraph(); -// return result; - } - - @Override - public void removeGraph(Node arg0) { - // TODO Auto-generated method stub - - } - - @Override - public boolean supportsTransactions() { - // TODO Auto-generated method stub - return false; - } - - @Override - public void abort() { - // TODO Auto-generated method stub - - } - - @Override - public void begin(ReadWrite arg0) { - // TODO Auto-generated method stub - - } - - @Override - public void commit() { - // TODO Auto-generated method stub - - } - - @Override - public void end() { - // TODO Auto-generated method stub - - } - - @Override - public boolean isInTransaction() { - // TODO Auto-generated method stub - return false; - } - - @Override - public void begin(TxnType type) { - // TODO Auto-generated method stub - - } - - @Override - public boolean promote(Promote mode) { - // TODO Auto-generated method stub - return false; - } - - @Override - public ReadWrite transactionMode() { - // TODO Auto-generated method stub - return null; - } - - @Override - public TxnType transactionType() { - // TODO Auto-generated method stub - return null; - } - -} diff --git a/jena-sparql-api-batch/src/main/java/org/aksw/jena_sparql_api/spring/conversion/C_StringToMappedQuery.java b/jena-sparql-api-batch/src/main/java/org/aksw/jena_sparql_api/spring/conversion/C_StringToMappedQuery.java index 937ededde..b065cc77c 100644 --- a/jena-sparql-api-batch/src/main/java/org/aksw/jena_sparql_api/spring/conversion/C_StringToMappedQuery.java +++ b/jena-sparql-api-batch/src/main/java/org/aksw/jena_sparql_api/spring/conversion/C_StringToMappedQuery.java @@ -3,14 +3,14 @@ import org.aksw.jena_sparql_api.mapper.MappedQuery; import org.aksw.jena_sparql_api.mapper.MappedQueryUtils; import org.aksw.jena_sparql_api.mapper.PartitionedQuery1; +import org.aksw.jena_sparql_api.mapper.PartitionedQuery1Impl; import org.aksw.jena_sparql_api.stmt.SparqlQueryParser; import org.aksw.jena_sparql_api.utils.VarUtils; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.core.convert.converter.Converter; - import org.apache.jena.query.Query; import org.apache.jena.sparql.core.DatasetGraph; import org.apache.jena.sparql.core.Var; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.convert.converter.Converter; @AutoRegistered @@ -39,7 +39,7 @@ public static PartitionedQuery1 parse(String str, SparqlQueryParser parser) { Var var = VarUtils.parseVar(splits[0]); Query query = parser.apply(splits[1]); - PartitionedQuery1 result = new PartitionedQuery1(query, var); + PartitionedQuery1 result = new PartitionedQuery1Impl(query, var); return result; } diff --git a/jena-sparql-api-cache-h2/pom.xml b/jena-sparql-api-cache-h2/pom.xml index 2148b401d..efe76ae0d 100644 --- a/jena-sparql-api-cache-h2/pom.xml +++ b/jena-sparql-api-cache-h2/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-cache/pom.xml b/jena-sparql-api-cache/pom.xml index bd8fc6cd3..51a555eda 100644 --- a/jena-sparql-api-cache/pom.xml +++ b/jena-sparql-api-cache/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-collections/pom.xml b/jena-sparql-api-collections/pom.xml index 77ceeb4ea..1bb4325bd 100644 --- a/jena-sparql-api-collections/pom.xml +++ b/jena-sparql-api-collections/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-collections/src/main/java/org/aksw/jena_sparql_api/rdf/collections/ConverterFromObjectToLexicalFormViaRDFDatatype.java b/jena-sparql-api-collections/src/main/java/org/aksw/jena_sparql_api/rdf/collections/ConverterFromObjectToLexicalFormViaRDFDatatype.java new file mode 100644 index 000000000..71a07ece2 --- /dev/null +++ b/jena-sparql-api-collections/src/main/java/org/aksw/jena_sparql_api/rdf/collections/ConverterFromObjectToLexicalFormViaRDFDatatype.java @@ -0,0 +1,34 @@ +package org.aksw.jena_sparql_api.rdf.collections; + +import org.apache.jena.datatypes.RDFDatatype; + +import com.google.common.base.Converter; + +/** + * A converter between Objects and lexical forms (Strings) via {@link RDFDatatype}. + * + * @author raven + * + */ +public class ConverterFromObjectToLexicalFormViaRDFDatatype + extends Converter +{ + protected RDFDatatype rdfDatatype; + + public ConverterFromObjectToLexicalFormViaRDFDatatype(RDFDatatype rdfDatatype) { + super(); + this.rdfDatatype = rdfDatatype; + } + + @Override + protected String doForward(Object value) { + String result = rdfDatatype.unparse(value); + return result; + } + + @Override + protected Object doBackward(String lexicalForm) { + Object result = rdfDatatype.parse(lexicalForm); + return result; + } +} diff --git a/jena-sparql-api-collections/src/main/java/org/aksw/jena_sparql_api/rdf/collections/ResourceUtils.java b/jena-sparql-api-collections/src/main/java/org/aksw/jena_sparql_api/rdf/collections/ResourceUtils.java index f661da214..691db032c 100644 --- a/jena-sparql-api-collections/src/main/java/org/aksw/jena_sparql_api/rdf/collections/ResourceUtils.java +++ b/jena-sparql-api-collections/src/main/java/org/aksw/jena_sparql_api/rdf/collections/ResourceUtils.java @@ -3,7 +3,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -1161,4 +1164,28 @@ public static T getPropertyValue(Statement stmt, RDFNodeMapp return result; } + /** + * A variant of {@link org.apache.jena.util.ResourceUtils#renameResource(Resource, String)} + * which renames multiple resources in bulk based on a given map. + * + * @param rdfNodeToIri + * @return A map of the remapped Resources + */ + public static Map renameResources(Map rdfNodeToIri) { + Map result = new HashMap<>(); + + for(Entry e : rdfNodeToIri.entrySet()) { + RDFNode n = e.getKey(); + String iri = e.getValue(); + + if(n.isResource()) { + Resource src = n.asResource(); + Resource tgt = org.apache.jena.util.ResourceUtils.renameResource(src, iri); + result.put(src, tgt); + } + } + + return result; + } + } diff --git a/jena-sparql-api-concepts/README.md b/jena-sparql-api-concepts/README.md index 1527d0dfc..b7dbcbe28 100644 --- a/jena-sparql-api-concepts/README.md +++ b/jena-sparql-api-concepts/README.md @@ -2,12 +2,12 @@ This module contains: -* SPARQL Fragments API: Enables vastly simplified construction and combination of SPARQL graph patterns. Supports constructing JOIN and FILTER operations, and most notably takes care of correct variable renaming. - * Main classes are: `Relation` (and subclasses UnaryRelation, BinaryRelation, TernaryRelation) +* SPARQL Relation Pattern API: Enables vastly simplified construction and combination of SPARQL graph patterns. Supports constructing JOIN and FILTER operations, and most notably takes care of correct variable renaming. + * The Main classes are: `Relation` (and subclasses UnaryRelation, BinaryRelation, TernaryRelation) * A *Concept* is a pair of a graph pattern and a variable thereof - so its essentially a sparql query with exactly 1 result columns * Concept Path Finder: Finds simple paths of RDF properties between a given start and end concepts. -Note: At present, things are named Relation, but Fragment might be a better choice. +A note on naming: For brevity we refer to relation patterns simply as relations as this is what they describe. ```java // Create the set ?s, where ?s is a person: diff --git a/jena-sparql-api-concepts/pom.xml b/jena-sparql-api-concepts/pom.xml index fcbd1663a..1838b49fc 100644 --- a/jena-sparql-api-concepts/pom.xml +++ b/jena-sparql-api-concepts/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Concept.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Concept.java index 68e99580f..9c6f4db08 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Concept.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Concept.java @@ -7,6 +7,7 @@ import org.aksw.jena_sparql_api.stmt.SparqlPrologueParserImpl; import org.aksw.jena_sparql_api.stmt.SparqlQueryParser; import org.aksw.jena_sparql_api.stmt.SparqlQueryParserImpl; +import org.aksw.jena_sparql_api.stmt.SparqlQueryParserWrapperSelectShortForm; import org.aksw.jena_sparql_api.utils.ElementUtils; import org.apache.jena.graph.Triple; import org.apache.jena.query.Query; @@ -30,7 +31,7 @@ * */ public class Concept - implements UnaryRelation + implements UnaryRelation { private Element element;//List elements; private Var var; @@ -53,15 +54,47 @@ public class Concept * Util method to parse strings that use a pipe as a separator between variable and sparql string * ?s | ?s a ex:Airport * + * FIXME This syntax should be replaced with standard SPARQL SELECT where SELECT is omitted: + * [SELECT] ?s { ?s a ex:Airport } + * * @param str * @return */ public static Concept parse(String str) { - Concept result = parse(str, null); + return parse(str, null); + } + + public static Concept createFromQuery(Query query) { + if (!query.isSelectType()) { + throw new RuntimeException("Query must be of select type"); + } + + if (query.getProjectVars().size() != 1) { + throw new RuntimeException("Query must have exactly 1 result variable"); + } + + Var var = query.getProjectVars().get(0); + + // FIXME Check for aggregators and such + + Concept result = new Concept(query.getQueryPattern(), var); return result; } + public static Concept parse(String str, PrefixMapping pm) { + pm = pm == null ? PrefixMapping.Extended : pm; + + SparqlQueryParser parser = SparqlQueryParserWrapperSelectShortForm.wrap( + SparqlQueryParserImpl.create(Syntax.syntaxARQ, new Prologue(pm))); + + Query query = parser.apply(str); + + Concept result = createFromQuery(query); + return result; + } + + public static Concept parseOld(String str, PrefixMapping pm) { String[] splits = str.split("\\|", 2); if(splits.length != 2) { throw new RuntimeException("Invalid string: " + str); @@ -128,7 +161,7 @@ public static Concept create(String elementStr, String varName, PrefixMapping pr return result; } - + public static Element parseElement(String elementStr, PrefixMapping prefixMapping) { String tmp = elementStr.trim(); boolean isEnclosed = tmp.startsWith("{") && tmp.endsWith("}"); @@ -141,7 +174,7 @@ public static Element parseElement(String elementStr, PrefixMapping prefixMappin Query query = new Query(); if(prefixMapping != null) { - query.setPrefixMapping(prefixMapping); + query.setPrefixMapping(prefixMapping); } // TODO Make parser configurable SPARQLParser parser = new ParserSPARQL11(); diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/ConceptUtils.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/ConceptUtils.java index e02f1eaa6..c29a91330 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/ConceptUtils.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/ConceptUtils.java @@ -27,6 +27,7 @@ import org.aksw.jena_sparql_api.utils.Vars; import org.apache.jena.ext.com.google.common.collect.Iterables; import org.apache.jena.graph.Node; +import org.apache.jena.graph.NodeFactory; import org.apache.jena.graph.Triple; import org.apache.jena.query.Query; import org.apache.jena.query.SortCondition; @@ -55,6 +56,7 @@ import org.apache.jena.sparql.syntax.ElementSubQuery; import org.apache.jena.sparql.syntax.ElementTriplesBlock; import org.apache.jena.sparql.syntax.PatternVars; +import org.apache.jena.vocabulary.RDF; import com.google.common.collect.Range; import com.google.common.collect.Sets; @@ -215,7 +217,7 @@ public static UnaryRelation createConceptFromRdfNodes(Iterable nodes) { + public static Concept createConcept(Iterable nodes) { ElementData data = new ElementData(); data.add(Vars.s); for(Node node : nodes) { @@ -322,6 +324,18 @@ public static Concept createSubjectConcept() { return result; } + public static Concept createForRdfType(String iriStr) { + return createForRdfType(NodeFactory.createURI(iriStr)); + } + + public static Concept createForRdfType(Node type) { + Concept result = new Concept( + ElementUtils.createElementTriple(Vars.s, RDF.Nodes.type, type), + Vars.s); + return result; + } + + public static Map createDistinctVarMap(Set workload, Set blacklist, Generator generator) { //Set varNames = new HashSet(VarUtils.getVarNames(blacklist)); // Generator gen = VarGeneratorBlacklist.create(generator, blacklist); diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/HasElement.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/HasElement.java new file mode 100644 index 000000000..638eb9a26 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/HasElement.java @@ -0,0 +1,61 @@ +package org.aksw.jena_sparql_api.concepts; + +import org.apache.jena.query.Query; +import org.apache.jena.sparql.algebra.Algebra; +import org.apache.jena.sparql.algebra.Op; +import org.apache.jena.sparql.algebra.Table; +import org.apache.jena.sparql.syntax.Element; +import org.apache.jena.sparql.syntax.ElementData; +import org.apache.jena.sparql.syntax.ElementSubQuery; + +/** + * Interface with several default methods for working which an object that holds + * an {@link Element}. In particular, this interface simplifies extraction + * of the Query or Table when the held Element is of type + * {@link ElementSubQuery} or {@link ElementData}, respectively. + * + * @author raven + * + */ +public interface HasElement { + + /** + * Return the element held by the implementation of this interface + * + * @return + */ + Element getElement(); + + + default boolean holdsQuery() { + Element elt = getElement(); + boolean result = elt instanceof ElementSubQuery; + return result; + } + + default Query extractQuery() { + Element elt = getElement(); + ElementSubQuery tmp = (ElementSubQuery)elt; + Query result = tmp.getQuery(); + return result; + } + + default boolean holdsTable() { + Element elt = getElement(); + boolean result = elt instanceof ElementData; + return result; + } + + default Table extractTable() { + Element elt = getElement(); + ElementData tmp = (ElementData)elt; + Table result = tmp.getTable(); + return result; + } + + default Op toOp() { + Element elt = getElement(); + Op result = Algebra.compile(elt); + return result; + } +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/HasPartitionVars.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/HasPartitionVars.java new file mode 100644 index 000000000..60451d9bd --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/HasPartitionVars.java @@ -0,0 +1,9 @@ +package org.aksw.jena_sparql_api.concepts; + +import java.util.List; + +import org.apache.jena.sparql.core.Var; + +public interface HasPartitionVars { + List getPartitionVars(); +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Relation.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Relation.java index 2e96815c5..f61347c21 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Relation.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/Relation.java @@ -23,18 +23,20 @@ * @author raven Mar 7, 2018 * */ -public interface Relation { +public interface Relation + extends HasElement +{ /** - * Return the distinguished variables of the relation + * Return the distinguished variables of the relation. + * The returned list is should be duplicate-free. + * The variables are NOT required to occur in the relation's element. * - * @return + * @return A list of variables */ List getVars(); - // Set getFixedVars(); - Element getElement(); /** * Return the set of mentioned variables without the distinguished ones @@ -65,6 +67,19 @@ default Relation rename(Function renameFn, Var ... constantVars) return result; } + /** + * Rename the variables of the relation to the given variables + * In case of clashes, prior variables will be replaced with fresh ones. + * Delegates ot {@link RelationUtils#rename(Relation, List)}. + * + * @param r + * @param targetVars + * @return + */ + default Relation rename(List targetVars) { + return RelationUtils.rename(this, targetVars); + } + default UnaryRelation toUnaryRelation() { List vars = getVars(); UnaryRelation result; @@ -208,6 +223,8 @@ default List getElements() { return ElementUtils.toElementList(getElement()); } + + // public static TernaryRelation from(Triple t) { // new TernaryRelationImpl(ElementUtils.createElement(t), t.getSubject(), t.getPredicate(), t.getObject()) // } diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/RelationUtils.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/RelationUtils.java index 46fec8203..7eb5c5648 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/RelationUtils.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/RelationUtils.java @@ -47,158 +47,158 @@ import com.google.common.collect.Sets; public class RelationUtils { - public static final TernaryRelation SPO = new TernaryRelationImpl( - ElementUtils.createElementTriple(Vars.s, Vars.p, Vars.o), - Vars.s, Vars.p, Vars.o); - - - /** - * Rename the variables of the relation to the given variables - * In case of clashes, prior variables will be replaced with fresh ones. - * - * @param r - * @param targetVars - * @return - */ - public static Relation rename(Relation r, List targetVars) { - List rVars = r.getVars(); - Map map = createRenameVarMap(r.getVarsMentioned(), rVars, targetVars); - - Relation result = r.applyNodeTransform(new NodeTransformSubst(map)); - - return result; - } - - - /** - * Rename the vars of the relation to the given target variables. - * Thereby take care of conflicts when the target variable also is also mentioned in the relation - * The implementation uses Relation.join() which treats the variables of the left-hand side - * of the join as fixed. - * - * - * @param r - * @param targetNodes A list of vars (TODO Change type to var) - * @return - */ - public static Element renameNodes(Relation r, List targetNodes) { - List tgtVars = targetNodes.stream().map(v -> (Var)v).collect(Collectors.toList()); - - // Create a relation with an empty pattern from the target nodes - Relation joined = new RelationImpl(new ElementGroup(), tgtVars) - .joinOn(tgtVars) - .with(r); - Element result = joined.getElement(); - + public static final TernaryRelation SPO = new TernaryRelationImpl( + ElementUtils.createElementTriple(Vars.s, Vars.p, Vars.o), + Vars.s, Vars.p, Vars.o); + + + /** + * Rename the variables of the relation to the given variables + * In case of clashes, prior variables will be replaced with fresh ones. + * + * @param r + * @param targetVars + * @return + */ + public static Relation rename(Relation r, List targetVars) { + List rVars = r.getVars(); + Map map = createRenameVarMap(r.getVarsMentioned(), rVars, targetVars); + + Relation result = r.applyNodeTransform(new NodeTransformSubst(map)); + + return result; + } + + + /** + * Rename the vars of the relation to the given target variables. + * Thereby take care of conflicts when the target variable also is also mentioned in the relation + * The implementation uses Relation.join() which treats the variables of the left-hand side + * of the join as fixed. + * + * + * @param r + * @param targetNodes A list of vars (TODO Change type to var) + * @return + */ + public static Element renameNodes(Relation r, List targetNodes) { + List tgtVars = targetNodes.stream().map(v -> (Var)v).collect(Collectors.toList()); + + // Create a relation with an empty pattern from the target nodes + Relation joined = new RelationImpl(new ElementGroup(), tgtVars) + .joinOn(tgtVars) + .with(r); + Element result = joined.getElement(); + // if(false) { // List rVars = r.getVars(); // Element e = r.getElement(); // Map map = createRenameVarMap(r.getVarsMentioned(), rVars, targetNodes); -// +// // Element result = ElementUtils.applyNodeTransform(e, new NodeTransformSubst(map)); // } - return result; - } - - public static Map createRenameVarMap(Set mentionedVars, List rVars, List targetNodes) { - //Set rVars = ElementUtils.getMentionedVars(e); - - Set relationVars = new LinkedHashSet<>(rVars); - Set vs = new LinkedHashSet<>(targetNodes); - if(vs.size() != relationVars.size()) { - throw new IllegalArgumentException("Number of distinct variables of the relation must match the number of distinct target variables"); - } - - Map rename = Streams.zip( - relationVars.stream(), - vs.stream(), - (a, b) -> new SimpleEntry<>(a, b)) - .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); - - - // Extend the map by renaming all remaining variables - //Set mentionedVars = ElementUtils.getMentionedVars(e); //r.getVarsMentioned(); - Set remainingVars = Sets.difference(mentionedVars, relationVars); - - //Set forbiddenVars = Sets.union(vs, mentionedVars); - Generator varGen = VarGeneratorBlacklist.create(remainingVars); - - Set targetVars = targetNodes.stream().filter(Node::isVariable).map(x -> (Var)x).collect(Collectors.toSet()); - // targetVars - Map map = VarUtils.createDistinctVarMap(targetVars, remainingVars, true, varGen); - //map.putAll(rename); - rename.putAll(map); - - return rename; - } - - - /** - * Rename variables of all relations to the given list of variables - * All relations and the list of given variables must have the same length - * - * @param relations - * @return - */ - public static Relation align(Collection relations, List vars) { - List tmp = relations.stream() - .map(r -> rename(r, vars)) - .collect(Collectors.toList()); - - List es = tmp.stream() - .map(Relation::getElement) - .collect(Collectors.toList()); - - - Element e = ElementUtils.unionIfNeeded(es); - Relation result = new RelationImpl(e, vars); - return result; - } - - - /** - * Apply groupBy and count(Distinct ?var) to one of a relation's variables. - * - * @param r - * @param aggVar - * @param resultVar - * @param includeAbsent if true, unbound values count too - * @return - */ - public static Relation groupBy(Relation r, Var aggVar, Var resultVar, boolean includeAbsent) { - Query query = new Query(); - query.setQuerySelectType(); - query.setQueryPattern(r.getElement()); - - ExprVar ev = new ExprVar(aggVar); - - Expr e = includeAbsent - ? new E_Conditional(new E_Bound(ev), ev, NodeValueUtils.NV_ABSENT) - : ev; - Expr tmp = query.allocAggregate(new AggCountVarDistinct(e)); - - List vars = r.getVars(); - - // Add all other vars as group vars - List groupVars = vars.stream() - .filter(v -> !aggVar.equals(v)) - .collect(Collectors.toList()); - - query.addProjectVars(groupVars); - query.getProject().add(resultVar, tmp); - - List newVars = new ArrayList<>(groupVars); - newVars.add(resultVar); - - for(Var groupVar : groupVars) { - query.addGroupBy(groupVar); - } - - Relation result = new RelationImpl(new ElementSubQuery(query), newVars); - return result; - } - - + return result; + } + + public static Map createRenameVarMap(Set mentionedVars, List rVars, List targetNodes) { + //Set rVars = ElementUtils.getMentionedVars(e); + + Set relationVars = new LinkedHashSet<>(rVars); + Set vs = new LinkedHashSet<>(targetNodes); + if(vs.size() != relationVars.size()) { + throw new IllegalArgumentException("Number of distinct variables of the relation must match the number of distinct target variables"); + } + + Map rename = Streams.zip( + relationVars.stream(), + vs.stream(), + (a, b) -> new SimpleEntry<>(a, b)) + .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); + + + // Extend the map by renaming all remaining variables + //Set mentionedVars = ElementUtils.getMentionedVars(e); //r.getVarsMentioned(); + Set remainingVars = Sets.difference(mentionedVars, relationVars); + + //Set forbiddenVars = Sets.union(vs, mentionedVars); + Generator varGen = VarGeneratorBlacklist.create(remainingVars); + + Set targetVars = targetNodes.stream().filter(Node::isVariable).map(x -> (Var)x).collect(Collectors.toSet()); + // targetVars + Map map = VarUtils.createDistinctVarMap(targetVars, remainingVars, true, varGen); + //map.putAll(rename); + rename.putAll(map); + + return rename; + } + + + /** + * Rename variables of all relations to the given list of variables + * All relations and the list of given variables must have the same length + * + * @param relations + * @return + */ + public static Relation align(Collection relations, List vars) { + List tmp = relations.stream() + .map(r -> rename(r, vars)) + .collect(Collectors.toList()); + + List es = tmp.stream() + .map(Relation::getElement) + .collect(Collectors.toList()); + + + Element e = ElementUtils.unionIfNeeded(es); + Relation result = new RelationImpl(e, vars); + return result; + } + + + /** + * Apply groupBy and count(Distinct ?var) to one of a relation's variables. + * + * @param r + * @param aggVar + * @param resultVar + * @param includeAbsent if true, unbound values count too + * @return + */ + public static Relation groupBy(Relation r, Var aggVar, Var resultVar, boolean includeAbsent) { + Query query = new Query(); + query.setQuerySelectType(); + query.setQueryPattern(r.getElement()); + + ExprVar ev = new ExprVar(aggVar); + + Expr e = includeAbsent + ? new E_Conditional(new E_Bound(ev), ev, NodeValueUtils.NV_ABSENT) + : ev; + Expr tmp = query.allocAggregate(new AggCountVarDistinct(e)); + + List vars = r.getVars(); + + // Add all other vars as group vars + List groupVars = vars.stream() + .filter(v -> !aggVar.equals(v)) + .collect(Collectors.toList()); + + query.addProjectVars(groupVars); + query.getProject().add(resultVar, tmp); + + List newVars = new ArrayList<>(groupVars); + newVars.add(resultVar); + + for(Var groupVar : groupVars) { + query.addGroupBy(groupVar); + } + + Relation result = new RelationImpl(new ElementSubQuery(query), newVars); + return result; + } + + // public static Relation createRelationRenamed(Relation prototype, Relation target) { // RelationUtils.create // @@ -212,41 +212,41 @@ public static Relation groupBy(Relation r, Var aggVar, Var resultVar, boolean in // // } - public static Relation fromQuery(String queryStr) { - return fromQuery(queryStr, PrefixMapping.Extended); - } + public static Relation fromQuery(String queryStr) { + return fromQuery(queryStr, PrefixMapping.Extended); + } - public static Relation fromQuery(String queryStr, PrefixMapping prefixMapping) { + public static Relation fromQuery(String queryStr, PrefixMapping prefixMapping) { Query query = new Query(); query.setPrefixMapping(prefixMapping); // TODO Make parser configurable SPARQLParser parser = new ParserSPARQL11(); parser.parse(query, queryStr); - Relation result = fromQuery(query); - return result; - } - - public static Relation fromQuery(Query query) { - Relation result; - if(query.isSelectType()) { - List vars = query.getProjectVars(); - Element element = query.getQueryPattern(); - result = new RelationImpl(element, vars); - } else if(query.isConstructType()) { - Template template = query.getConstructTemplate(); - List vars = new ArrayList<>(QuadPatternUtils.getVarsMentioned(template.getQuads())); - Element element = query.getQueryPattern(); - result = new RelationImpl(element, vars); - } else { - - throw new RuntimeException("SELECT o CONSTRUCT query form expected, instead got " + query); - } - - return result; - } - - + Relation result = fromQuery(query); + return result; + } + + public static Relation fromQuery(Query query) { + Relation result; + if(query.isSelectType()) { + List vars = query.getProjectVars(); + Element element = query.getQueryPattern(); + result = new RelationImpl(element, vars); + } else if(query.isConstructType()) { + Template template = query.getConstructTemplate(); + List vars = new ArrayList<>(QuadPatternUtils.getVarsMentioned(template.getQuads())); + Element element = query.getQueryPattern(); + result = new RelationImpl(element, vars); + } else { + + throw new RuntimeException("SELECT o CONSTRUCT query form expected, instead got " + query); + } + + return result; + } + + public static Triple extractTriple(BinaryRelation relation) { Element e = relation.getElement(); @@ -258,76 +258,76 @@ public static Triple extractTriple(BinaryRelation relation) { // Relation result = addUnionMember(a, b, false); // return result; // } - + // public static Relation concat(Relation a, Relation b, boolean transformInPlaceIfApplicable) { -// +// // } - + public static BinaryRelation and(BinaryRelation a, BinaryRelation b, boolean transformInPlaceIfApplicable) { - Element ae = a.getElement(); - Element be = b.getElement(); - - Collection vas = PatternVars.vars(ae); - Collection vbs = PatternVars.vars(be); - Map varMap = VarUtils.createDistinctVarMap(vas, vbs, true, null); - - varMap.put(b.getSourceVar(), a.getTargetVar()); - Element ce = ElementUtils.createRenamedElement(be, varMap); - - ElementGroup eg; - boolean isInPlace = ae instanceof ElementGroup && transformInPlaceIfApplicable; - if(isInPlace) { - eg = (ElementGroup)ae; - } else { - eg = new ElementGroup(); - eg.addElement(ae); - } - eg.addElement(ce);; - - BinaryRelation result = new BinaryRelationImpl(eg, a.getSourceVar(), varMap.getOrDefault(b.getTargetVar(), a.getSourceVar())); - - return result; + Element ae = a.getElement(); + Element be = b.getElement(); + + Collection vas = PatternVars.vars(ae); + Collection vbs = PatternVars.vars(be); + Map varMap = VarUtils.createDistinctVarMap(vas, vbs, true, null); + + varMap.put(b.getSourceVar(), a.getTargetVar()); + Element ce = ElementUtils.createRenamedElement(be, varMap); + + ElementGroup eg; + boolean isInPlace = ae instanceof ElementGroup && transformInPlaceIfApplicable; + if(isInPlace) { + eg = (ElementGroup)ae; + } else { + eg = new ElementGroup(); + eg.addElement(ae); + } + eg.addElement(ce);; + + BinaryRelation result = new BinaryRelationImpl(eg, a.getSourceVar(), varMap.getOrDefault(b.getTargetVar(), a.getSourceVar())); + + return result; } - - + + /** - * - * + * + * * @param a * @param b * @param transformInPlaceIfApplicable Add 'b' to to 'a' if a's element already is a union * @return */ public static BinaryRelation union(BinaryRelation a, BinaryRelation b, boolean transformInPlaceIfApplicable) { - Element ae = a.getElement(); - - ElementUnion u; - boolean isInPlace; - if(transformInPlaceIfApplicable && a.getElement() instanceof ElementUnion) { - u = (ElementUnion)ae; - isInPlace = true; - } else { - u = new ElementUnion(); - u.addElement(a.getElement()); - isInPlace = false; - } - - - Map varMap = new HashMap<>(); - - Collection vas = PatternVars.vars(a.getElement()); - Collection vbs = PatternVars.vars(b.getElement()); - VarUtils.createDistinctVarMap(vbs, vas, true, null); - - varMap.put(b.getSourceVar(), a.getSourceVar()); - varMap.put(b.getTargetVar(), a.getTargetVar()); - Element c = ElementUtils.createRenamedElement(b.getElement(), varMap); - u.addElement(c); - - BinaryRelation result = isInPlace ? a : new BinaryRelationImpl(u, a.getSourceVar(), a.getTargetVar()); - return result; + Element ae = a.getElement(); + + ElementUnion u; + boolean isInPlace; + if(transformInPlaceIfApplicable && a.getElement() instanceof ElementUnion) { + u = (ElementUnion)ae; + isInPlace = true; + } else { + u = new ElementUnion(); + u.addElement(a.getElement()); + isInPlace = false; + } + + + Map varMap = new HashMap<>(); + + Collection vas = PatternVars.vars(a.getElement()); + Collection vbs = PatternVars.vars(b.getElement()); + VarUtils.createDistinctVarMap(vbs, vas, true, null); + + varMap.put(b.getSourceVar(), a.getSourceVar()); + varMap.put(b.getTargetVar(), a.getTargetVar()); + Element c = ElementUtils.createRenamedElement(b.getElement(), varMap); + u.addElement(c); + + BinaryRelation result = isInPlace ? a : new BinaryRelationImpl(u, a.getSourceVar(), a.getTargetVar()); + return result; } - + public static BinaryRelation createRelation(String propertyUri, boolean isInverse, PrefixMapping prefixMapping) { String p = prefixMapping == null ? propertyUri : prefixMapping.expandPrefix(propertyUri); @@ -338,15 +338,15 @@ public static BinaryRelation createRelation(String propertyUri, boolean isInvers public static BinaryRelation createRelation(Node property, boolean isInverse) { - + //Expr expr = new E_Equals(new ExprVar(Vars.p), ExprUtils.nodeToExpr(property)); - - Triple t = isInverse - ? new Triple(Vars.o, property, Vars.s) - : new Triple(Vars.s, property, Vars.o); - - Element element = ElementUtils.createElement(t); - //Element element = new ElementTriplesBlock(bgp); + + Triple t = isInverse + ? new Triple(Vars.o, property, Vars.s) + : new Triple(Vars.s, property, Vars.o); + + Element element = ElementUtils.createElement(t); + //Element element = new ElementTriplesBlock(bgp); BinaryRelation result = new BinaryRelationImpl(element, Vars.s, Vars.o);//createRelation(expr, isInverse); return result; } @@ -370,29 +370,29 @@ public static BinaryRelation createRelation(Expr expr, boolean isInverse) { public static Query createQuery(Relation relation) { - // If the element is already a query, just limit the projection + // If the element is already a query, just limit the projection Element e = relation.getElement(); List vars = relation.getVars(); Query result; if(e instanceof ElementSubQuery) { - result = ((ElementSubQuery)e).getQuery().cloneQuery(); - - // Update the projection - Set removals = new HashSet<>(result.getProject().getVars()); - removals.removeAll(vars); - - VarExprList project = result.getProject(); - removals.forEach(project::remove); - + result = ((ElementSubQuery)e).getQuery().cloneQuery(); + + // Update the projection + Set removals = new HashSet<>(result.getProject().getVars()); + removals.removeAll(vars); + + VarExprList project = result.getProject(); + removals.forEach(project::remove); + } else { - result = new Query(); - result.setQuerySelectType(); - - result.setQueryPattern(e); - - VarExprList project = result.getProject(); - vars.forEach(project::add); + result = new Query(); + result.setQuerySelectType(); + + result.setQueryPattern(e); + + VarExprList project = result.getProject(); + vars.forEach(project::add); } return result; diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/UnaryRelation.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/UnaryRelation.java index b1c5d9633..9930bec59 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/UnaryRelation.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/concepts/UnaryRelation.java @@ -19,6 +19,12 @@ default List getVars() { } + /** + * Test whether this relation is isomorphic to + * {@code ?s WHERE { ?s ?p ?o }} + * + * @return + */ default boolean isSubjectConcept() { return ConceptUtils.isSubjectConcept(this); } diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/AccBestLiteral.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/AccBestLiteral.java new file mode 100644 index 000000000..45d7ead70 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/AccBestLiteral.java @@ -0,0 +1,104 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.Iterator; +import java.util.List; +import java.util.function.BiPredicate; +import java.util.stream.IntStream; + +import org.aksw.jena_sparql_api.utils.NodeUtils; +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.engine.binding.Binding; +import org.apache.jena.sparql.expr.NodeValue; +import org.apache.jena.sparql.expr.aggregate.Accumulator; +import org.apache.jena.sparql.function.FunctionEnv; + +// FIXME Take LiteralPreference.preferProperties into account +public class AccBestLiteral + implements Accumulator +{ + protected BestLiteralConfig bestLiteralConfig; + protected Node bestMatchNode; + protected int[] bestMatchScore; + + public AccBestLiteral(BestLiteralConfig bestLiteralConfig) { + this.bestLiteralConfig = bestLiteralConfig; + + this.bestMatchNode = null; + + // Scores for predicate and language; lower means better match + this.bestMatchScore = new int[] {Integer.MAX_VALUE, Integer.MAX_VALUE}; + } + + @Override + public void accumulate(Binding binding, FunctionEnv functionEnv) { + + // Evaluate label, property and subject based on the binding + + Node subject = binding.get(bestLiteralConfig.getSubjectVar()); + Node property = binding.get(bestLiteralConfig.getPredicateVar()); + Node label = binding.get(bestLiteralConfig.getObjectVar()); + + List predicates = bestLiteralConfig.getPredicates(); + List langs = bestLiteralConfig.getLangs(); + + if(this.bestMatchNode == null) { + this.bestMatchNode = subject; + } + + String candidateLang = NodeUtils.getLang(label); + + // Determine the score vector for the property and the language + int propertyScore = predicates == null ? 0 : predicates.indexOf(property); + int langScore = langs == null ? 0 : langs.indexOf(candidateLang); + + int[] score = new int[] {propertyScore, langScore}; + + boolean allNonNegative = IntStream.of(score).allMatch(item -> item >= 0); + + if (allNonNegative) { + // Check if the new score is better (less than) than the current best match + boolean isBetterMatch = AccBestLiteral.compareIterators( + IntStream.of(score).iterator(), + IntStream.of(bestMatchScore).iterator(), (x, y) -> x < y); + + if (isBetterMatch) { + bestMatchScore = score; + bestMatchNode = label; + } + } + } + + @Override + public NodeValue getValue() { + return bestMatchNode == null ? null : NodeValue.makeNode(bestMatchNode); + } + + + public static boolean compareIterators(Iterator as, Iterator bs, BiPredicate op) { + boolean result = false; + + while (as.hasNext() && bs.hasNext()) { + T a = as.next(); + T b = bs.next(); + + if (op.test(a, b)) { + if (op.test(b, a)) { + continue; + } + + result = true; + break; + } else { //else if(op(b, a)) { + if (!op.test(b, a)) { + continue; + } + + result = false; + break; + } + } + + return result; + }; + +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/AccList.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/AccList.java index 9997039ad..d64b5c966 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/AccList.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/AccList.java @@ -5,12 +5,13 @@ import org.apache.jena.sparql.engine.binding.Binding; + /** * An accumulator similar to that of Jena, however it uses a generic for the * value. - * + * * @author raven - * + * * @param */ public class AccList implements Acc> { diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/BestLiteralConfig.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/BestLiteralConfig.java new file mode 100644 index 000000000..30c39e2d8 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/BestLiteralConfig.java @@ -0,0 +1,84 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +import org.aksw.jena_sparql_api.utils.Vars; +import org.apache.jena.graph.Node; +import org.apache.jena.rdf.model.Property; +import org.apache.jena.sparql.core.Var; + +public class BestLiteralConfig { + + protected LiteralPreference literalPreference; + protected Var subjectVar; + protected Var predicateVar; + protected Var objectVar; + + public BestLiteralConfig(LiteralPreference literalPreference) { + this(literalPreference, Vars.s, Vars.p, Vars.o); + } + + public BestLiteralConfig( + LiteralPreference literalPreference, + Var subjectVar, + Var predicateVar, + Var objectVar) { + this.literalPreference = literalPreference; + this.subjectVar = subjectVar; + this.predicateVar = predicateVar; + this.objectVar = objectVar; + } + + public LiteralPreference getLiteralPreference() { + return literalPreference; + } + + public Var getSubjectVar() { + return subjectVar; + } + + public Var getPredicateVar() { + return predicateVar; + } + + public Var getObjectVar() { + return objectVar; + } + + /** + * Convenience method + * + */ + public List getLangs() { + List result = this.literalPreference.getLangs(); + return result; + } + + /** + * Convenience method + * + */ + public List getPredicates() { + List result = this.literalPreference.getPredicates(); + return result; + } + + public static BestLiteralConfig fromProperty(Property property) { + BestLiteralConfig result = new BestLiteralConfig(new LiteralPreference( + null, Collections.singletonList(property.asNode()), false)); + return result; + } + + @Override + public String toString() { + String result = Arrays.asList( + "BestLabelConfig", getLangs(), getPredicates(), + getSubjectVar(), getPredicateVar(), getObjectVar()) + .stream().map(Objects::toString).collect(Collectors.joining(", ")); + return result; + } +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/LabelUtils.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/LabelUtils.java new file mode 100644 index 000000000..34d53ad23 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/LabelUtils.java @@ -0,0 +1,62 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.List; +import java.util.stream.Collectors; + +import org.aksw.jena_sparql_api.concepts.TernaryRelation; +import org.aksw.jena_sparql_api.concepts.TernaryRelationImpl; +import org.aksw.jena_sparql_api.utils.ExprUtils; +import org.aksw.jena_sparql_api.utils.Vars; +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.expr.E_Lang; +import org.apache.jena.sparql.expr.E_LangMatches; +import org.apache.jena.sparql.expr.Expr; +import org.apache.jena.sparql.expr.ExprVar; +import org.apache.jena.sparql.expr.NodeValue; +import org.apache.jena.sparql.syntax.ElementFilter; +import org.apache.jena.sparql.syntax.ElementGroup; + +public class LabelUtils { + + public static TernaryRelation createRelationLiteralPreference(LiteralPreference literalPreference) { + BestLiteralConfig blc = new BestLiteralConfig(literalPreference, Vars.x, Vars.y, Vars.z); + TernaryRelation result = createRelationPrefLabels(blc); + return result; + } + + + public static TernaryRelation createRelationPrefLabels(BestLiteralConfig bestLiteralConfig) { + + List prefLangs = bestLiteralConfig.getLangs(); + List prefPreds = bestLiteralConfig.getPredicates(); + + Var s = bestLiteralConfig.getSubjectVar(); + Var p = bestLiteralConfig.getPredicateVar(); + Var o = bestLiteralConfig.getObjectVar(); + + Expr labelExpr = new ExprVar(o); + + // Second, create the element + List langTmp = prefLangs.stream().map(lang -> { + Expr r = new E_LangMatches(new E_Lang(labelExpr), NodeValue.makeString(lang)); + return r; + }).collect(Collectors.toList()); + + // Combine multiple expressions into a single logicalOr expression. + Expr langConstraint = ExprUtils.orifyBalanced(langTmp); + Expr propFilter = ExprUtils.oneOf(p, prefPreds); + + ElementGroup els = new ElementGroup(); + els.addTriplePattern(new Triple(s, p, o)); + els.addElementFilter(new ElementFilter(propFilter)); + els.addElementFilter(new ElementFilter(langConstraint)); + + //var result = new Concept(langElement, s); + TernaryRelation result = new TernaryRelationImpl(els, s, p, o); + return result; + } + +} + diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/LiteralPreference.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/LiteralPreference.java new file mode 100644 index 000000000..69143cc38 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/LiteralPreference.java @@ -0,0 +1,42 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.List; + +import org.apache.jena.graph.Node; + +/** + * Configuration object that serves as the base for choosing the best rdf term in object position + * from a set of triples. + * + * TODO Add a flag to also match IRIs + * + * @author raven + * + */ +public class LiteralPreference { + protected List langs; + protected List predicates; + protected boolean preferProperties = false; + + public LiteralPreference( + List langs, + List predicates, + boolean preferProperties) { + super(); + this.langs = langs; + this.predicates = predicates; + this.preferProperties = preferProperties; + } + + public List getLangs() { + return langs; + } + + public List getPredicates() { + return predicates; + } + + public boolean isPreferProperties() { + return preferProperties; + } +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQuery.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQuery.java index d2bbbb855..5ce6e00b6 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQuery.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQuery.java @@ -23,7 +23,7 @@ public Agg getAgg() { } public static MappedQuery create(Query query, Var partitionVar, Agg agg) { - MappedQuery result = new MappedQuery(new PartitionedQuery1(query, partitionVar), agg); + MappedQuery result = new MappedQuery(new PartitionedQuery1Impl(query, partitionVar), agg); return result; } diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQueryUtils.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQueryUtils.java index 2c3bf6721..b75c3a758 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQueryUtils.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/MappedQueryUtils.java @@ -3,8 +3,6 @@ import java.util.Set; import org.aksw.jena_sparql_api.utils.QuadPatternUtils; -import org.aksw.jena_sparql_api.utils.Vars; - import org.apache.jena.query.Query; import org.apache.jena.sparql.core.DatasetGraph; import org.apache.jena.sparql.core.Quad; @@ -16,7 +14,7 @@ public class MappedQueryUtils { public static MappedQuery fromConstructQuery(Query query, Var partitionVar) { - PartitionedQuery1 partQuery = new PartitionedQuery1(query, partitionVar); + PartitionedQuery1 partQuery = new PartitionedQuery1Impl(query, partitionVar); MappedQuery result = fromConstructQuery(partQuery); return result; } @@ -53,7 +51,7 @@ public static MappedQuery fromConstructQuery(PartitionedQuery1 par // qp.add(new Quad(Quad.defaultGraphNodeGenerated, Vars.s, Vars.p, Vars.o)); // Agg agg = AggDatasetGraph.create(qp); - PartitionedQuery1 pq = new PartitionedQuery1(query, partVar); + PartitionedQuery1 pq = new PartitionedQuery1Impl(query, partVar); result = new MappedQuery(pq, agg); diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQuery.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQuery.java new file mode 100644 index 000000000..642f42078 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQuery.java @@ -0,0 +1,62 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.Map; + +import org.aksw.jena_sparql_api.concepts.Relation; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.sparql.algebra.Table; +import org.apache.jena.sparql.engine.binding.Binding; +import org.apache.jena.sparql.expr.ExprList; +import org.apache.jena.sparql.syntax.Element; +import org.apache.jena.sparql.syntax.Template; + +/** + * An object (graph) query is an extension of a SPARQL construct query: + * Blank nodes in the query template can additionally be mapped to sequences + * of SPARQL expressions. This way blank nodes can be assigned a client-side mapping + * that allocate specific ID thus enabling deterministic template instantiation without + * the need of having the ID generation part of the SPARQL query. + * + * Yet, if desired, the use of SPARQL expressions enables query rewriting + * that pushes the ID generation into a conventional SPARQL query as illustrated below: + * + *
+ * CONSTRUCT {
+ *   ?bnode a FacetValueCount ; facet ?f ; value ?v ; count ?c
+ * } WHERE {
+ *   SELECT { ?f ?v ?c { ... } }
+ *   BIND(sequence(?f, ?v) AS ?bnode)
+ * 
+ * + * + * @author raven + * + */ +public interface ObjectQuery { + /** + * The template is the same as for SPARQL contstruct queries + * + * @return The template + */ + Template getTemplate(); + + /** + * The mapping of blank nodes in the template to expression lists. + * Only blank nodes should be mapped. + * Any expression in the ExprList should only make use of variables that + * are distinguished variables of the relation + * + * @return The mapping. May be empty but never null. + */ + Map getIdMapping(); + + /** + * Relation is a uniform representation for something that is or can be evaluated + * to a set of {@link Binding}s. + * Concretely, a relation can be (SELECT) {@link Query}, {@link Element} or {@link Table}. + * + * @return The relation that acts as the source of bindings for this object query + */ + Relation getRelation(); +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryBase.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryBase.java new file mode 100644 index 000000000..5cf553a72 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryBase.java @@ -0,0 +1,22 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.Map; + +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.expr.ExprList; + +public abstract class ObjectQueryBase + implements ObjectQuery +{ + protected Map idMapping; + + public ObjectQueryBase(Map idMapping) { + super(); + this.idMapping = idMapping; + } + + @Override + public Map getIdMapping() { + return idMapping; + } +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryFromQuery.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryFromQuery.java new file mode 100644 index 000000000..b864aab46 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryFromQuery.java @@ -0,0 +1,39 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.HashMap; +import java.util.Map; + +import org.aksw.jena_sparql_api.concepts.Relation; +import org.aksw.jena_sparql_api.concepts.RelationUtils; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.sparql.expr.ExprList; +import org.apache.jena.sparql.syntax.Template; + +public class ObjectQueryFromQuery + extends ObjectQueryBase +{ + protected Query query; + + public ObjectQueryFromQuery(Query query) { + this(query, new HashMap<>()); + } + + public ObjectQueryFromQuery(Query query, Map idMapping) { + super(idMapping); + this.query = query; + } + + @Override + public Template getTemplate() { + return query.getConstructTemplate(); + } + + @Override + public Relation getRelation() { + // return RelationUtils.fromQuery(query); + Query asSelect = query.cloneQuery(); + asSelect.setQuerySelectType(); + return RelationUtils.fromQuery(asSelect); + } +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryImpl.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryImpl.java new file mode 100644 index 000000000..73f640bd9 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/ObjectQueryImpl.java @@ -0,0 +1,48 @@ +package org.aksw.jena_sparql_api.mapper; + +import java.util.HashMap; +import java.util.Map; + +import org.aksw.jena_sparql_api.concepts.Relation; +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.expr.ExprList; +import org.apache.jena.sparql.syntax.Template; + +public class ObjectQueryImpl + extends ObjectQueryBase +{ + protected Template template; + protected Relation relation; + + public ObjectQueryImpl(Template template, Relation relation) { + this(template, relation, new HashMap<>()); + } + + public ObjectQueryImpl(Template template, Relation relation, Map idMapping) { + super(idMapping); + this.template = template; + this.relation = relation; + } + + public Template getTemplate() { + return template; + } + + public void setTemplate(Template template) { + this.template = template; + } + + public Relation getRelation() { + return relation; + } + + public void setRelation(Relation relation) { + this.relation = relation; + } + + @Override + public String toString() { + return "ObjectQueryImpl [template=" + template + ", relation=" + relation + ", idMapping=" + idMapping + "]"; + } +} + diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery.java index c716380da..ddcdc7b45 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery.java @@ -2,10 +2,47 @@ import java.util.List; +import org.aksw.jena_sparql_api.concepts.HasElement; import org.apache.jena.query.Query; import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.syntax.Element; +import org.apache.jena.sparql.syntax.Template; -public interface PartitionedQuery { +/** + * A query where a subset of the mentioned vars are used to partition + * its result set. The partitioned result set can then serve as the + * basis for accumulating objects from it - i.e. each partition is the base for an object + * (which may be a set of objects). + * + * TODO Add a feature that allows specification of mappings of blank nodes in construct templates + * to expressions that remap them to local IDs; i.e. Map nodeToIdExpr + * Conceptually this substitutes bnodes with variables that are bound to an expression + * CONSTRUCT { ?bnode1 a Foo } { ... ?x . BIND(fn(?x) AS ?bnode1 } + * This way bnode substitution can be done explicitly in the client + * Actually this is again the Sponate concept where objects are accumulated using aggregation based + * on a JSON template (rather than CONSTRUCT/triples template). Back then we had even support to assemble lists from sparql results. + * + * + * + * @author raven + * + */ +public interface PartitionedQuery + extends HasElement +{ Query getQuery(); List getPartitionVars(); + + default Template getTemplate() { + Query query = getQuery(); + Template result = query.getConstructTemplate(); + return result; + } + + @Override + default Element getElement() { + Query query = getQuery(); + Element result = query.getQueryPattern(); + return result; + } } diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery1.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery1.java index 61c0a8595..32900dae5 100644 --- a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery1.java +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery1.java @@ -3,77 +3,23 @@ import java.util.Collections; import java.util.List; -import org.apache.jena.query.Query; import org.apache.jena.sparql.core.Var; -import org.apache.jena.util.OneToManyMap.Entry; -public class PartitionedQuery1 - implements PartitionedQuery +/** + * Special case of a query partitioned by a single variable + * + * @author raven + * + */ +public interface PartitionedQuery1 + extends PartitionedQuery { - protected Query query; - protected Var partitionVar; - - public PartitionedQuery1(Query query, Var partitionVar) { - super(); - this.query = query; - this.partitionVar = partitionVar; - } - - public Query getQuery() { - return query; - } - - public Var getPartitionVar() { - return partitionVar; - } + Var getPartitionVar(); @Override - public List getPartitionVars() { + default List getPartitionVars() { + Var partitionVar = getPartitionVar(); return Collections.singletonList(partitionVar); } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((query == null) ? 0 : query.hashCode()); - result = prime * result + ((partitionVar == null) ? 0 : partitionVar.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - PartitionedQuery1 other = (PartitionedQuery1) obj; - if (query == null) { - if (other.query != null) - return false; - } else if (!query.equals(other.query)) - return false; - if (partitionVar == null) { - if (other.partitionVar != null) - return false; - } else if (!partitionVar.equals(other.partitionVar)) - return false; - return true; - } - - @Override - public String toString() { - String result = "" + partitionVar + " | " + query; - return result; - } - -// public static PartitionedQuery1 from(Entry e) { -// return from(e.getKey(), e.getValue()); -// } - - public static PartitionedQuery1 from(Query view, Var viewVar) { - return new PartitionedQuery1(view, viewVar); - } } + diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery1Impl.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery1Impl.java new file mode 100644 index 000000000..e7e0c4420 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/PartitionedQuery1Impl.java @@ -0,0 +1,72 @@ +package org.aksw.jena_sparql_api.mapper; + +import org.apache.jena.query.Query; +import org.apache.jena.sparql.core.Var; + +public class PartitionedQuery1Impl + implements PartitionedQuery1 +{ + protected Query query; + protected Var partitionVar; + + public PartitionedQuery1Impl(Query query, Var partitionVar) { + super(); + this.query = query; + this.partitionVar = partitionVar; + } + + @Override + public Query getQuery() { + return query; + } + + @Override + public Var getPartitionVar() { + return partitionVar; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((query == null) ? 0 : query.hashCode()); + result = prime * result + ((partitionVar == null) ? 0 : partitionVar.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PartitionedQuery1Impl other = (PartitionedQuery1Impl) obj; + if (query == null) { + if (other.query != null) + return false; + } else if (!query.equals(other.query)) + return false; + if (partitionVar == null) { + if (other.partitionVar != null) + return false; + } else if (!partitionVar.equals(other.partitionVar)) + return false; + return true; + } + + @Override + public String toString() { + String result = "" + partitionVar + " | " + query; + return result; + } + +// public static PartitionedQuery1 from(Entry e) { +// return from(e.getKey(), e.getValue()); +// } + + public static PartitionedQuery1 from(Query view, Var viewVar) { + return new PartitionedQuery1Impl(view, viewVar); + } +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQuery.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQuery.java new file mode 100644 index 000000000..2f976b036 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQuery.java @@ -0,0 +1,66 @@ +package org.aksw.jena_sparql_api.mapper; + +import org.apache.jena.graph.Node; +import org.apache.jena.rdf.model.Resource; + +/** + * A wrapper for a partitioned query where a single node of + * its template is designated as a root node. + * + * This enables construction of {@link Resource} instances even from partitions based on + * multiple partition variables. + * + * For example, facet value counts can be conceptually expressed + * in the following extended SPARQL syntax: + * + *
+ * {@code
+ *   CONSTRUCT { _:b :facet ?f; :facetValue ?fv ; :facetValueCount ?fvc }
+ *     { SELECT ?f ?fv (COUNT(DISTINCT *) AS ?fvc) { ... } GROUP BY ?f ?fv}
+ *   PARTITION BY ?f ?fv
+ *   ROOT _:b
+ * }
+ * 
+ * + * Evaluation of partitioned queries remaps blank nodes of the template only ONCE per partition. + * In this example, for each distinct binding of ?f and ?fv the blank node _:b will be mapped to + * a single fresh blank node for each partition. + * + * + * As a consequence, in the example, the computed count ONLY applies to a certain binding of both ?f and ?fv. + * There does not exist a resource the count can be attached to as a property, though one + * can be allocated using: + *
+ *   BIND(CONCAT('http://...', STR(?f), STR(?vf)) AS ?grouped)
+ * 
+ * However, this allocation is usually not very useful: + *
    + *
  • Query rewriting that filters on ?grouped is inefficient as it can not exploit any index
  • + *
  • As ?grouped is derived from ?f and ?fv it duplicates their information which consequently needlessly + * increases the result set size and thus lowers performance of transmitting it.
  • + *
+ * + * + * + * @author raven + * + */ +public interface RootedQuery { + /** + * The designated root node. The following rules apply: + *
    + *
  • If {@code getObjectQuery().getTemplate()} is non-empty + * then the root node should be mentioned in the template.
  • + * + *
  • In case the template is empty then the root node should be a variable + * that occurs in {@code getObjectQuery().getRelation().getElement()}. + * Furthermore, if in this case the query is partitioned by a single variable then the root + * node should match that one.
  • + *
+ * + * @return + */ + Node getRootNode(); + // PartitionedQuery getPartitionedQuery(); + ObjectQuery getObjectQuery(); +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQueryFromPartitionedQuery1.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQueryFromPartitionedQuery1.java new file mode 100644 index 000000000..2ed9a2f62 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQueryFromPartitionedQuery1.java @@ -0,0 +1,31 @@ +package org.aksw.jena_sparql_api.mapper; + +import org.apache.jena.graph.Node; + +/** + * Treat the sole partition variable of a {@link PartitionedQuery1} as the root node. + * + * @author raven + * + */ +public class RootedQueryFromPartitionedQuery1 + implements RootedQuery +{ + protected PartitionedQuery1 partitionedQuery1; + + public RootedQueryFromPartitionedQuery1(PartitionedQuery1 partitionedQuery1) { + super(); + this.partitionedQuery1 = partitionedQuery1; + } + + @Override + public Node getRootNode() { + return partitionedQuery1.getPartitionVar(); + } + + @Override + public ObjectQuery getObjectQuery() { + // TODO Have the object query wrapper delegate every call to partitionedQuery.getQuery() + return new ObjectQueryFromQuery(partitionedQuery1.getQuery()); + } +} diff --git a/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQueryImpl.java b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQueryImpl.java new file mode 100644 index 000000000..016d6d5e1 --- /dev/null +++ b/jena-sparql-api-concepts/src/main/java/org/aksw/jena_sparql_api/mapper/RootedQueryImpl.java @@ -0,0 +1,26 @@ +package org.aksw.jena_sparql_api.mapper; + +import org.apache.jena.graph.Node; + +public class RootedQueryImpl + implements RootedQuery +{ + protected Node rootNode; + protected ObjectQuery objectQuery; + + public RootedQueryImpl(Node rootNode, ObjectQuery objectQuery) { + super(); + this.rootNode = rootNode; + this.objectQuery = objectQuery; + } + + @Override + public Node getRootNode() { + return rootNode; + } + + @Override + public ObjectQuery getObjectQuery() { + return objectQuery; + } +} diff --git a/jena-sparql-api-conjure/pom.xml b/jena-sparql-api-conjure/pom.xml index d7c7f7cab..1ee56f105 100644 --- a/jena-sparql-api-conjure/pom.xml +++ b/jena-sparql-api-conjure/pom.xml @@ -12,7 +12,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/dcat/ap/utils/DcatDatasetCreation.java b/jena-sparql-api-conjure/src/main/java/org/aksw/dcat/ap/utils/DcatDatasetCreation.java new file mode 100644 index 000000000..dc10116bc --- /dev/null +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/dcat/ap/utils/DcatDatasetCreation.java @@ -0,0 +1,34 @@ +package org.aksw.dcat.ap.utils; + +import org.aksw.dcat.jena.domain.api.DcatDataset; +import org.aksw.dcat.jena.domain.api.DcatDistribution; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; + +public class DcatDatasetCreation { + /** + * Create a simple DCAT Dataset from a url. + * + * The pattern is as follows: + *
+     * url#dataset
+     *   dcat:distribution url#distribution .
+     *
+     * url#distribution
+     *   dcat:downloadURL url
+     * 
+ * + * @param url + * @return + */ + public static DcatDataset fromDownloadUrl(String url) { + Model model = ModelFactory.createDefaultModel(); + DcatDataset result = model.createResource(url + "#dataset").as(DcatDataset.class); + + DcatDistribution dist = model.createResource(url + "#distribution").as(DcatDistribution.class); + result.getDistributions(DcatDistribution.class).add(dist); + dist.setDownloadURL(url); + + return result; + } +} diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/dataset/engine/ExecutionUtils.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/dataset/engine/ExecutionUtils.java index d17ae2ef3..7d00d5db1 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/dataset/engine/ExecutionUtils.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/dataset/engine/ExecutionUtils.java @@ -16,7 +16,7 @@ import org.aksw.dcat.jena.domain.api.DcatDataset; import org.aksw.dcat.jena.domain.api.DcatDistribution; -import org.aksw.dcat.jena.domain.api.MvnEntity; +import org.aksw.dcat.jena.domain.api.MavenEntity; import org.aksw.jena_sparql_api.conjure.algebra.common.ResourceTreeUtils; import org.aksw.jena_sparql_api.conjure.datapod.api.RdfDataPod; import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRef; @@ -69,7 +69,7 @@ public class ExecutionUtils { * @return */ public static String deriveId(Resource r) { - MvnEntity ds = ModelFactory.createDefaultModel().createResource().as(MvnEntity.class); + MavenEntity ds = ModelFactory.createDefaultModel().createResource().as(MavenEntity.class); String mvnId = Arrays.asList(ds.getGroupId(), ds.getArtifactId(), ds.getVersion(), ds.getClassifier()).stream() .filter(Objects::nonNull) .collect(Collectors.joining(":")); @@ -124,7 +124,7 @@ public static RdfDataPod executeJob(Op op) { } catch (IOException e) { throw new RuntimeException(e); } - ResourceStore cacheStore = repo.getCacheStore(); +// ResourceStore cacheStore = repo.getCacheStore(); OpExecutorDefault catalogExecutor = new OpExecutorDefault(repo, TaskContext.empty(), new LinkedHashMap<>(), RDFFormat.TURTLE_PRETTY); RdfDataPod result = op.accept(catalogExecutor); diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRef.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRef.java new file mode 100644 index 000000000..a8cf437d8 --- /dev/null +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRef.java @@ -0,0 +1,5 @@ +package org.aksw.jena_sparql_api.conjure.entityref.core.api; + +public interface EntityRef { + +} diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefIdentifier.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefIdentifier.java new file mode 100644 index 000000000..98c4866dc --- /dev/null +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefIdentifier.java @@ -0,0 +1,5 @@ +package org.aksw.jena_sparql_api.conjure.entityref.core.api; + +public interface EntityRefIdentifier { + +} diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefIri.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefIri.java new file mode 100644 index 000000000..b97518a63 --- /dev/null +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefIri.java @@ -0,0 +1,5 @@ +package org.aksw.jena_sparql_api.conjure.entityref.core.api; + +public interface EntityRefIri { + +} diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefMaven.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefMaven.java new file mode 100644 index 000000000..9176c7fa5 --- /dev/null +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefMaven.java @@ -0,0 +1,5 @@ +package org.aksw.jena_sparql_api.conjure.entityref.core.api; + +public interface EntityRefMaven { + +} diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefVisitor.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefVisitor.java new file mode 100644 index 000000000..d1f4cb214 --- /dev/null +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/entityref/core/api/EntityRefVisitor.java @@ -0,0 +1,7 @@ +package org.aksw.jena_sparql_api.conjure.entityref.core.api; + +public interface EntityRefVisitor { + T visit(EntityRefIdentifier entityRef); + T visit(EntityRefIri entityRef); + T visit(EntityRefMaven entityRef); +} diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/ConjureFluent.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/ConjureFluent.java index 7ea82a9dc..51b74b5c7 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/ConjureFluent.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/ConjureFluent.java @@ -9,64 +9,70 @@ import org.apache.jena.update.UpdateRequest; public interface ConjureFluent { - Op getOp(); - - // Not sure if this is the best place - // hdtHeader is a modifier for a datarefUrl - ConjureFluent hdtHeader(); - - ConjureFluent cache(); - - ConjureFluent construct(Collection queryStrs); - //ConjureFluent construct(Collection queryStrs); - - default ConjureFluent construct(String queryStr) { - return construct(Collections.singleton(queryStr)); - } - - - ConjureFluent stmts(Collection stmtStrs); - //ConjureFluent construct(Collection queryStrs); - - default ConjureFluent stmt(String stmtStr) { - return construct(Collections.singleton(stmtStr)); - } - - - ConjureFluent update(String updateRequest); - ConjureFluent views(String ... queryStrs); - ConjureFluent views(Collection queries); - - - ConjureFluent set(String ctxVar, String selector, String path); - - default ConjureFluent construct(Query query) { - return construct(query.toString()); - } - - default ConjureFluent update(UpdateRequest updateRequest) { - return update(updateRequest.toString()); - } - - // We could create the queries programmatically in a util function - // But we will validated them anyway with the parser - - default ConjureFluent ofProperty(String p) { - return construct("CONSTRUCT WHERE { ?s <" + p + "> ?o"); - } - - - default ConjureFluent everthing() { - return construct(QLib.everything()); - } - - - default ConjureFluent tripleCount() { - return construct(QLib.tripleCount()); - } - - default ConjureFluent compose(Function composer) { - ConjureFluent result = composer.apply(this); - return result; - } + Op getOp(); + + // Not sure if this is the best place + // hdtHeader is a modifier for a datarefUrl + ConjureFluent hdtHeader(); + + ConjureFluent cache(); + + /** + * Construct a new dataset from a sequence of construct queries + * + * @param queryStrs + * @return + */ + ConjureFluent construct(Collection queryStrs); + //ConjureFluent construct(Collection queryStrs); + + default ConjureFluent construct(String queryStr) { + return construct(Collections.singleton(queryStr)); + } + + + ConjureFluent stmts(Collection stmtStrs); + //ConjureFluent construct(Collection queryStrs); + + default ConjureFluent stmt(String stmtStr) { + return construct(Collections.singleton(stmtStr)); + } + + + ConjureFluent update(String updateRequest); + ConjureFluent views(String ... queryStrs); + ConjureFluent views(Collection queries); + + + ConjureFluent set(String ctxVar, String selector, String path); + + default ConjureFluent construct(Query query) { + return construct(query.toString()); + } + + default ConjureFluent update(UpdateRequest updateRequest) { + return update(updateRequest.toString()); + } + + // We could create the queries programmatically in a util function + // But we will validated them anyway with the parser + + default ConjureFluent ofProperty(String p) { + return construct("CONSTRUCT WHERE { ?s <" + p + "> ?o"); + } + + + default ConjureFluent everthing() { + return construct(QLib.everything()); + } + + + default ConjureFluent tripleCount() { + return construct(QLib.tripleCount()); + } + + default ConjureFluent compose(Function composer) { + ConjureFluent result = composer.apply(this); + return result; + } } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/JobUtils.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/JobUtils.java index d8e6f3612..664367809 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/JobUtils.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/fluent/JobUtils.java @@ -29,104 +29,133 @@ public class JobUtils { - public static Job fromSparqlFile(String path) throws FileNotFoundException, IOException, ParseException { - // TODO Add API for Query objects to fluent - List stmts = Streams.stream(SparqlStmtUtils.processFile(DefaultPrefixes.prefixes, path)) - .collect(Collectors.toList()); - - List stmtStrs = stmts.stream() - .map(Object::toString) - .collect(Collectors.toList()); - - - //RDFDataMgrRx - //SparqlStmtUtils. - - -// + public static Job fromSparqlFile(String path) throws FileNotFoundException, IOException, ParseException { + // TODO Add API for Query objects to fluent + List stmts = Streams.stream(SparqlStmtUtils.processFile(DefaultPrefixes.prefixes, path)) + .collect(Collectors.toList()); + + List stmtStrs = stmts.stream() + .map(Object::toString) + .collect(Collectors.toList()); + + + //RDFDataMgrRx + //SparqlStmtUtils. + + +// // List queries = RDFDataMgrEx.loadQueries(path, DefaultPrefixes.prefixes).stream() // .map(Object::toString) // .collect(Collectors.toList()); - ConjureBuilder cj = new ConjureBuilderImpl(); + ConjureBuilder cj = new ConjureBuilderImpl(); + + String opVarName = "ARG"; + Op op = cj.fromVar(opVarName).stmts(stmtStrs).getOp(); - String opVarName = "ARG"; - Op op = cj.fromVar(opVarName).stmts(stmtStrs).getOp(); - // Set vars = OpUtils.mentionedVarNames(op); // for(SparqlStmt stmt : stmts) { // System.out.println("Env vars: " + SparqlStmtUtils.mentionedEnvVars(stmt)); // } - - Map combinedMap = stmts.stream() - .map(SparqlStmtUtils::mentionedEnvVars) - .map(Map::entrySet) - .flatMap(Collection::stream) - .distinct() - .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); - - Set envVars = combinedMap.keySet(); + + Map combinedMap = stmts.stream() + .map(SparqlStmtUtils::mentionedEnvVars) + .map(Map::entrySet) + .flatMap(Collection::stream) + .distinct() + .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); + + Set envVars = combinedMap.keySet(); // System.out.println("All env vars: " + combinedMap); - - + + // System.out.println("MentionedVars: " + vars); - - Job result = Job.create(cj.getContext().getModel()) - .setOp(op) - .setDeclaredVars(envVars) - .setOpVars(Collections.singleton(opVarName)); - - - return result; - } - - public static JobInstance createJobInstance( - Job job, - Map env, - Map map) { - Model model = ModelFactory.createDefaultModel(); - Job j = JenaPluginUtils.copyClosureInto(job, Job.class, model); - - JobInstance result = model.createResource().as(JobInstance.class) - .setJob(j); - - result.getEnvMap().putAll(env); - - for(Entry e : map.entrySet()) { - String k = e.getKey(); - Op v = e.getValue(); - - Op vv = JenaPluginUtils.copyClosureInto(v, Op.class, model); - result.getOpVarMap().put(k, vv); - } - - return result; - } - - /** - * Return the associated op with all all variables (literals and resources) substituted - * - * @param jobInstance - * @return - */ - public static Op materializeJobInstance(JobInstance jobInstance) { - Map envMap = jobInstance.getEnvMap(); - Map opMap = jobInstance.getOpVarMap(); - - Job job = jobInstance.getJob(); - Op tmp = job.getOp(); - Op op = JenaPluginUtils.reachableClosure(tmp, Op.class); - - NodeTransform nodeTransform = x -> NodeUtils.substWithLookup2(x, envMap::get); - //NodeTransform nodeTransform = new NodeTransformRenameMap(envMap); - OpUtils.applyNodeTransform(op, nodeTransform, stmt -> SparqlStmtUtils.optimizePrefixes(SparqlStmtParserImpl.create(DefaultPrefixes.prefixes).apply(stmt))); - - // OpUtils.applyNodeTransform(); - - - //ResourceUtils.reachableClosure(root) - - Op inst = OpUtils.substituteVars(op, opMap::get); - - return inst; - } + + Job result = Job.create(cj.getContext().getModel()) + .setOp(op) + .setDeclaredVars(envVars) + .setOpVars(Collections.singleton(opVarName)); + + + return result; + } + + public static JobInstance createJobInstanceWithCopy( + Job job, + Map env, + Map map) { + Model model = ModelFactory.createDefaultModel(); + Job j = JenaPluginUtils.copyClosureInto(job, Job.class, model); + + JobInstance result = model.createResource().as(JobInstance.class) + .setJob(j); + + result.getEnvMap().putAll(env); + + for(Entry e : map.entrySet()) { + String k = e.getKey(); + Op v = e.getValue(); + + Op vv = JenaPluginUtils.copyClosureInto(v, Op.class, model); + result.getOpVarMap().put(k, vv); + } + + return result; + } + + /** + * Create a job instance in the same model as the job + * + * @param job + * @param env + * @param map + * @return + */ + public static JobInstance createJobInstance( + Job job, + Map env, + Map map) { + Model model = job.getModel(); + JobInstance result = model.createResource().as(JobInstance.class) + .setJob(job); + + result.getEnvMap().putAll(env); + + for(Entry e : map.entrySet()) { + String k = e.getKey(); + Op v = e.getValue(); + + Op vv = JenaPluginUtils.copyClosureInto(v, Op.class, model); + result.getOpVarMap().put(k, vv); + } + + return result; + } + + /** + * Return the associated op with all all variables (literals and resources) substituted + * + * @param jobInstance + * @return + */ + public static Op materializeJobInstance(JobInstance jobInstance) { + Map envMap = jobInstance.getEnvMap(); + Map opMap = jobInstance.getOpVarMap(); + + Job job = jobInstance.getJob(); + Op tmp = job.getOp(); + Op op = JenaPluginUtils.reachableClosure(tmp, Op.class); + + NodeTransform nodeTransform = x -> NodeUtils.substWithLookup2(x, envMap::get); + //NodeTransform nodeTransform = new NodeTransformRenameMap(envMap); + OpUtils.applyNodeTransform(op, nodeTransform, stmt -> SparqlStmtUtils.optimizePrefixes(SparqlStmtParserImpl.create(DefaultPrefixes.prefixes).apply(stmt))); + + // OpUtils.applyNodeTransform(); + + + //ResourceUtils.reachableClosure(root) + + Op inst = OpUtils.substituteVars(op, opMap::get); + + return inst; + } } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/plugin/JenaPluginConjure.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/plugin/JenaPluginConjure.java index 297b40b44..bbd1e6daf 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/plugin/JenaPluginConjure.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/plugin/JenaPluginConjure.java @@ -2,9 +2,28 @@ import org.aksw.dcat.ap.domain.api.Checksum; import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRef; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefCatalog; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefDcat; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefEmpty; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefExt; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefGit; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefOp; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefSparqlEndpoint; +import org.aksw.jena_sparql_api.conjure.dataref.rdf.api.DataRefUrl; import org.aksw.jena_sparql_api.conjure.job.api.Job; +import org.aksw.jena_sparql_api.conjure.job.api.JobBinding; +import org.aksw.jena_sparql_api.conjure.job.api.JobInstance; +import org.aksw.jena_sparql_api.conjure.job.api.Macro; +import org.aksw.jena_sparql_api.conjure.job.api.MacroParam; import org.aksw.jena_sparql_api.conjure.resourcespec.ResourceSpec; +import org.aksw.jena_sparql_api.conjure.resourcespec.ResourceSpecInline; +import org.aksw.jena_sparql_api.conjure.resourcespec.ResourceSpecUrl; +import org.aksw.jena_sparql_api.conjure.traversal.api.OpPropertyPath; import org.aksw.jena_sparql_api.conjure.traversal.api.OpTraversal; +import org.aksw.jena_sparql_api.conjure.traversal.api.OpTraversal0; +import org.aksw.jena_sparql_api.conjure.traversal.api.OpTraversal1; +import org.aksw.jena_sparql_api.conjure.traversal.api.OpTraversal2; +import org.aksw.jena_sparql_api.conjure.traversal.api.OpTraversalSelf; import org.aksw.jena_sparql_api.http.domain.api.RdfEntityInfoDefault; import org.aksw.jena_sparql_api.io.hdt.JenaPluginHdt; import org.aksw.jena_sparql_api.mapper.proxy.JenaPluginUtils; @@ -12,28 +31,73 @@ import org.apache.jena.sys.JenaSubsystemLifecycle; public class JenaPluginConjure - implements JenaSubsystemLifecycle { - - public void start() { - init(); - } - - @Override - public void stop() { - } - - - public static void init() { - TurtleWriterNoBase.register(); - JenaPluginHdt.init(); - - JenaPluginUtils.scan(Job.class); - JenaPluginUtils.scan(OpTraversal.class); - JenaPluginUtils.scan(RdfEntityInfoDefault.class); - JenaPluginUtils.scan(Checksum.class); - JenaPluginUtils.scan(DataRef.class); - JenaPluginUtils.scan(org.aksw.jena_sparql_api.conjure.entity.algebra.Op.class); - JenaPluginUtils.scan(org.aksw.jena_sparql_api.conjure.dataset.algebra.Op.class); - JenaPluginUtils.scan(ResourceSpec.class); - } + implements JenaSubsystemLifecycle { + + public void start() { + init(); + } + + @Override + public void stop() { + } + + + public static void init() { + TurtleWriterNoBase.register(); + JenaPluginHdt.init(); + + JenaPluginUtils.registerResourceClasses( + Job.class, JobBinding.class, JobInstance.class, Macro.class, + MacroParam.class); + + JenaPluginUtils.registerResourceClasses( + OpTraversal.class, OpTraversal0.class, OpTraversal1.class, + OpTraversal2.class, OpTraversalSelf.class, OpPropertyPath.class); + + JenaPluginUtils.registerResourceClasses(RdfEntityInfoDefault.class); + + JenaPluginUtils.registerResourceClasses(Checksum.class); + + JenaPluginUtils.registerResourceClasses( + DataRef.class, DataRefCatalog.class, DataRefDcat.class, DataRefEmpty.class, DataRefExt.class, + DataRefGit.class, DataRefOp.class, DataRefSparqlEndpoint.class, DataRefUrl.class); + + JenaPluginUtils.registerResourceClasses( + org.aksw.jena_sparql_api.conjure.entity.algebra.Op.class, + org.aksw.jena_sparql_api.conjure.entity.algebra.Op0.class, + org.aksw.jena_sparql_api.conjure.entity.algebra.Op1.class, + org.aksw.jena_sparql_api.conjure.entity.algebra.OpCode.class, + org.aksw.jena_sparql_api.conjure.entity.algebra.OpConvert.class, + org.aksw.jena_sparql_api.conjure.entity.algebra.OpPath.class, + org.aksw.jena_sparql_api.conjure.entity.algebra.OpValue.class + ); + + JenaPluginUtils.registerResourceClasses( + org.aksw.jena_sparql_api.conjure.dataset.algebra.Op.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.Op1.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.Op2.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpCoalesce.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpConstruct.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpData.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpDataRefResource.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpError.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpHdtHeader.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpJobInstance.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpMacroCall.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpN.class, +// org.aksw.jena_sparql_api.conjure.dataset.algebra.OpNothing.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpPersist.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpQueryOverViews.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpSequence.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpSet.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpStmtList.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpUnion.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpUpdateRequest.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpVar.class, + org.aksw.jena_sparql_api.conjure.dataset.algebra.OpWhen.class + ); + + JenaPluginUtils.registerResourceClasses( + ResourceSpec.class, ResourceSpecInline.class, ResourceSpecUrl.class); + } } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/utils/HttpHeaderUtils.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/utils/HttpHeaderUtils.java index 717fe46c7..44a2ae947 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/utils/HttpHeaderUtils.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/conjure/utils/HttpHeaderUtils.java @@ -26,207 +26,207 @@ import com.google.common.net.MediaType; public class HttpHeaderUtils { - - public static Entry toEntry(Header header) { - Entry result = Maps.immutableEntry( - header.getName(), - header.getValue()); - return result; - } - - public static Stream> toEntries(Header[] headers) { - return Arrays.asList(headers).stream().map(HttpHeaderUtils::toEntry); - } - - - public static Header[] mergeHeaders(Header[] headers, String name) { - List
affectedHeaders = streamHeaders(headers, name) - .collect(Collectors.toList()); - - List parts = getElements(affectedHeaders.toArray(new Header[] {}), name) - .collect(Collectors.toList()); - - String mergedStr = parts.stream() - .map(Objects::toString) - .collect(Collectors.joining(",")); - - Header[] result = new Header[headers.length - (affectedHeaders.isEmpty() ? 0 : affectedHeaders.size() - 1)]; - - boolean isHeaderEmitted = false; - for(int i = 0, j = 0; i < headers.length; ++i) { - Header h = headers[i]; - String headerName = h.getName(); - - if(headerName.equalsIgnoreCase(name)) { - if(!isHeaderEmitted) { - result[j++] = new BasicHeader(headerName, mergedStr); - isHeaderEmitted = true; - } - // else skip - } else { - result[j++] = headers[i]; - } - } - - return result; - } - - public static float qValueOf(HeaderElement h) { - float result = Optional.ofNullable(h.getParameterByName("q")) - .map(NameValuePair::getValue) - .map(Float::parseFloat) - .orElse(1.0f); - return result; - } - - public static Stream
streamHeaders(Header[] headers) { - Stream
result = headers == null ? Stream.empty() : - Arrays.asList(headers).stream(); - - return result; - } - - public static Stream
streamHeaders(Header[] headers, String name) { - Stream
result = streamHeaders(headers) - .filter(h -> h.getName().equalsIgnoreCase(name)); - - return result; - } - - public static Stream getElements(Header[] headers) { - Stream result = streamHeaders(headers) - .flatMap(h -> Arrays.asList(h.getElements()).stream()); - - return result; - } - - public static Stream getElements(Header[] headers, String name) { - Stream result = streamHeaders(headers) - .filter(Objects::nonNull) - .filter(h -> h.getName().equalsIgnoreCase(name)) - .flatMap(h -> Arrays.asList(h.getElements()).stream()); - - return result; - } - - /** - * TODO Ensure the result is stable; the javadoc for .sorted does not seem to guarantee this - * - * @param headers - * @param name - * @return A linked hash map with items inserted in the order of their q value - */ - public static Map getOrderedValues(Header[] headers, String name) { - Map result = getElements(headers, name) - .map(e -> Maps.immutableEntry(e.getName(), qValueOf(e))) - .sorted((a, b) -> a.getValue().compareTo(b.getValue())) - .collect(CollectorUtils.toLinkedHashMap(Entry::getKey, Entry::getValue)); - return result; - } - - - public static String getValueOrNull(Header header) { - List values = header == null - ? null - : getValues(new Header[] {header}, header.getName()); - - if(values != null && values.size() > 1) { - throw new RuntimeException("At most 1 value expected, got: " + values); - } - - String result = values == null ? null : values.get(0); - return result; - } - - - public static String getValue(Header[] headers, String name) { - List contentTypes = getValues(headers, name); - if(contentTypes.size() != 1) { - throw new RuntimeException("Exactly one content type expected, got: " + contentTypes); - } - - return contentTypes.get(0); - } - - public static List getValues(Header header, String name) { - List result = getValues(new Header[] { header }, name); - return result; - } - - public static List getValues(Header[] headers, String name) { - List result = getElements(headers, name) - .map(HeaderElement::getName) - .collect(Collectors.toList()); - - return result; - } - - public static RdfEntityInfo copyMetaData(HttpEntity src, RdfEntityInfo tgt) { - tgt = tgt != null - ? tgt - : ModelFactory.createDefaultModel().createResource().as(RdfEntityInfo.class); - - List encodings = getValues(src.getContentEncoding(), HttpHeaders.CONTENT_ENCODING); - String ct = getValueOrNull(src.getContentType()); - - tgt.setContentType(ct); - tgt.setContentEncodings(encodings); - tgt.setContentLength(src.getContentLength()); - - return tgt; - } - - - - // Bridge between rdf model and apache http components - public static Header[] toHeaders(RdfEntityInfo info) { - Header[] result = new Header[] { - // TODO Add charset argument to content type header if info.getCharset() is non null - new BasicHeader(HttpHeaders.CONTENT_TYPE, info.getContentType()), - new BasicHeader(HttpHeaders.CONTENT_ENCODING, info.getEncodingsAsHttpHeader()) - }; - - return result; - } - - // TODO Move to some jena http utils - - - - public static List supportedMediaTypes() { - Collection langs = RDFLanguages.getRegisteredLanguages(); - List result = supportedMediaTypes(langs); - return result; - } - - public static List langToContentTypes(Lang lang) { - List result = Stream.concat( - Stream.of(lang.getContentType().getContentType()), - lang.getAltContentTypes().stream()) - .distinct() - .collect(Collectors.toList()); - - return result; - } - - - public static List langToMediaTypes(Lang lang) { - List result = langToContentTypes(lang).stream() - .map(MediaType::parse) - .collect(Collectors.toList()); - - return result; - } - - public static List supportedMediaTypes(Collection langs) { - List types = langs.stream() - // Models can surely be served using based languages - // TODO but what about quad based formats? I guess its fine to serve a quad based dataset - // with only a default graph - //.filter(RDFLanguages::isTriples) - .flatMap(lang -> langToMediaTypes(lang).stream()) - .collect(Collectors.toList()); - return types; - } - + + public static Entry toEntry(Header header) { + Entry result = Maps.immutableEntry( + header.getName(), + header.getValue()); + return result; + } + + public static Stream> toEntries(Header[] headers) { + return Arrays.asList(headers).stream().map(HttpHeaderUtils::toEntry); + } + + + public static Header[] mergeHeaders(Header[] headers, String name) { + List
affectedHeaders = streamHeaders(headers, name) + .collect(Collectors.toList()); + + List parts = getElements(affectedHeaders.toArray(new Header[] {}), name) + .collect(Collectors.toList()); + + String mergedStr = parts.stream() + .map(Objects::toString) + .collect(Collectors.joining(",")); + + Header[] result = new Header[headers.length - (affectedHeaders.isEmpty() ? 0 : affectedHeaders.size() - 1)]; + + boolean isHeaderEmitted = false; + for(int i = 0, j = 0; i < headers.length; ++i) { + Header h = headers[i]; + String headerName = h.getName(); + + if(headerName.equalsIgnoreCase(name)) { + if(!isHeaderEmitted) { + result[j++] = new BasicHeader(headerName, mergedStr); + isHeaderEmitted = true; + } + // else skip + } else { + result[j++] = headers[i]; + } + } + + return result; + } + + public static float qValueOf(HeaderElement h) { + float result = Optional.ofNullable(h.getParameterByName("q")) + .map(NameValuePair::getValue) + .map(Float::parseFloat) + .orElse(1.0f); + return result; + } + + public static Stream
streamHeaders(Header[] headers) { + Stream
result = headers == null ? Stream.empty() : + Arrays.asList(headers).stream(); + + return result; + } + + public static Stream
streamHeaders(Header[] headers, String name) { + Stream
result = streamHeaders(headers) + .filter(h -> h.getName().equalsIgnoreCase(name)); + + return result; + } + + public static Stream getElements(Header[] headers) { + Stream result = streamHeaders(headers) + .flatMap(h -> Arrays.asList(h.getElements()).stream()); + + return result; + } + + public static Stream getElements(Header[] headers, String name) { + Stream result = streamHeaders(headers) + .filter(Objects::nonNull) + .filter(h -> h.getName().equalsIgnoreCase(name)) + .flatMap(h -> Arrays.asList(h.getElements()).stream()); + + return result; + } + + /** + * TODO Ensure the result is stable; the javadoc for .sorted does not seem to guarantee this + * + * @param headers + * @param name + * @return A linked hash map with items inserted in the order of their q value + */ + public static Map getOrderedValues(Header[] headers, String name) { + Map result = getElements(headers, name) + .map(e -> Maps.immutableEntry(e.getName(), qValueOf(e))) + .sorted((a, b) -> a.getValue().compareTo(b.getValue())) + .collect(CollectorUtils.toLinkedHashMap(Entry::getKey, Entry::getValue)); + return result; + } + + + public static String getValueOrNull(Header header) { + List values = header == null + ? null + : getValues(new Header[] {header}, header.getName()); + + if(values != null && values.size() > 1) { + throw new RuntimeException("At most 1 value expected, got: " + values); + } + + String result = values == null ? null : values.get(0); + return result; + } + + + public static String getValue(Header[] headers, String name) { + List contentTypes = getValues(headers, name); + if(contentTypes.size() != 1) { + throw new RuntimeException("Exactly one content type expected, got: " + contentTypes); + } + + return contentTypes.get(0); + } + + public static List getValues(Header header, String name) { + List result = getValues(new Header[] { header }, name); + return result; + } + + public static List getValues(Header[] headers, String name) { + List result = getElements(headers, name) + .map(HeaderElement::getName) + .collect(Collectors.toList()); + + return result; + } + + public static RdfEntityInfo copyMetaData(HttpEntity src, RdfEntityInfo tgt) { + tgt = tgt != null + ? tgt + : ModelFactory.createDefaultModel().createResource().as(RdfEntityInfo.class); + + List encodings = getValues(src.getContentEncoding(), HttpHeaders.CONTENT_ENCODING); + String ct = getValueOrNull(src.getContentType()); + + tgt.setContentType(ct); + tgt.setContentEncodings(encodings); +// tgt.setContentLength(src.getContentLength()); + + return tgt; + } + + + + // Bridge between rdf model and apache http components + public static Header[] toHeaders(RdfEntityInfo info) { + Header[] result = new Header[] { + // TODO Add charset argument to content type header if info.getCharset() is non null + new BasicHeader(HttpHeaders.CONTENT_TYPE, info.getContentType()), + new BasicHeader(HttpHeaders.CONTENT_ENCODING, info.getEncodingsAsHttpHeader()) + }; + + return result; + } + + // TODO Move to some jena http utils + + + + public static List supportedMediaTypes() { + Collection langs = RDFLanguages.getRegisteredLanguages(); + List result = supportedMediaTypes(langs); + return result; + } + + public static List langToContentTypes(Lang lang) { + List result = Stream.concat( + Stream.of(lang.getContentType().getContentType()), + lang.getAltContentTypes().stream()) + .distinct() + .collect(Collectors.toList()); + + return result; + } + + + public static List langToMediaTypes(Lang lang) { + List result = langToContentTypes(lang).stream() + .map(MediaType::parse) + .collect(Collectors.toList()); + + return result; + } + + public static List supportedMediaTypes(Collection langs) { + List types = langs.stream() + // Models can surely be served using based languages + // TODO but what about quad based formats? I guess its fine to serve a quad based dataset + // with only a default graph + //.filter(RDFLanguages::isTriples) + .flatMap(lang -> langToMediaTypes(lang).stream()) + .collect(Collectors.toList()); + return types; + } + } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfo.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfo.java index dcb5a7f08..91c6f20fb 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfo.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfo.java @@ -8,25 +8,25 @@ import org.apache.jena.rdf.model.Resource; public interface RdfEntityInfo - extends Resource, EntityInfoCore + extends Resource, EntityInfoCore { - RdfEntityInfo setContentEncodings(List enocdings); - RdfEntityInfo setContentType(String contentType); - RdfEntityInfo setCharset(String charset); - RdfEntityInfo setContentLength(Long length); - - Collection getHashes(); - + RdfEntityInfo setContentEncodings(List enocdings); + RdfEntityInfo setContentType(String contentType); + RdfEntityInfo setCharset(String charset); +// RdfEntityInfo setContentLength(Long length); + + Collection getHashes(); + // @ToString // default $toString() { -// +// // } - - default Checksum getHash(String algo) { - Checksum result = getHashes().stream() - .filter(x -> algo.equalsIgnoreCase(x.getAlgorithm())) - .findAny() - .orElse(null); - return result; - } + + default Checksum getHash(String algo) { + Checksum result = getHashes().stream() + .filter(x -> algo.equalsIgnoreCase(x.getAlgorithm())) + .findAny() + .orElse(null); + return result; + } } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfoDefault.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfoDefault.java index 31131c87c..87fd097bd 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfoDefault.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/domain/api/RdfEntityInfoDefault.java @@ -21,10 +21,9 @@ public interface RdfEntityInfoDefault @Override String getContentType(); - -// @IriNs("eg") -// @Override -// Long getContentLength(); +// @IriNs("eg") +// @Override +// Long getContentLength(); /** * Charset, such as UTF-8 or ISO 8859-1 diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpEntity.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpEntity.java index 3777dfe48..957476bbc 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpEntity.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpEntity.java @@ -6,7 +6,11 @@ import org.apache.jena.rdf.model.Resource; public interface RdfHttpEntity { - Resource getCombinedInfo(); + // RdfHttpResourceFile getHttpResource(); - InputStream open() throws IOException; + Resource getCombinedInfo(); + + InputStream open() throws IOException; + +// WriteProgress put(InputStream in); } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpResourceFile.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpResourceFile.java index 5c11dd4d2..ddfdcfcd5 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpResourceFile.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/RdfHttpResourceFile.java @@ -7,39 +7,41 @@ /** * A resource is identified by an ID and can host multiple content entities. - * - * @author raven * + * @author raven */ public interface RdfHttpResourceFile { - - RdfHttpResourceFile resolve(String path); - - ResourceStore getResourceStore(); - Collection getEntities(); - - Path getRelativePath(); - - default Path getAbsolutePath() { - Path relPath = getRelativePath(); - Path parentAbsPath = getResourceStore().getAbsolutePath(); - Path result = parentAbsPath.resolve(relPath); - - return result; - } - - - /** - * Get or create an entity that matches the given description. - * This is typically done based on an RDF description corresponding to HTTP accept headers: - * accept, accept-encoding, accept-charset, accept-language. - * However, the design intentionally allows for custom resolution mechanisms. - * - * TODO Add the RFC number(s) the resolution mechanism should adhere to - * - * - * @param description - * @return - */ - RdfHttpEntityFile allocate(Resource description); + + ResourceStore getResourceStore(); + Collection getEntities(); + + /** The relative path that backs this http resource */ + Path getRelativePath(); + + /** Return a http entity with a certain path relative to {{@link #getRelativePath()}. + TODO Clarify whether the path must not resolve to a ancestor folder of this entity's path */ + RdfHttpResourceFile resolve(String path); + + default Path getAbsolutePath() { + Path relPath = getRelativePath(); + Path parentAbsPath = getResourceStore().getAbsolutePath(); + Path result = parentAbsPath.resolve(relPath); + + return result; + } + + + /** + * Get or create an entity that matches the given description. + * This is typically done based on an RDF description corresponding to HTTP accept headers: + * accept, accept-encoding, accept-charset, accept-language. + * However, the design intentionally allows for custom resolution mechanisms. + * + * TODO Add the RFC number(s) the resolution mechanism should adhere to + * + * + * @param description + * @return + */ + RdfHttpEntityFile allocate(Resource description); } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/WriteProgress.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/WriteProgress.java new file mode 100644 index 000000000..5910b6f77 --- /dev/null +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/api/WriteProgress.java @@ -0,0 +1,17 @@ +package org.aksw.jena_sparql_api.http.repository.api; + +import java.util.concurrent.TimeUnit; + +public interface WriteProgress { + /** Abort the process; no effect if already completed */ + void abort(); + + boolean isAborted(); + boolean isFinished(); + + /** Block until the process is complete */ + void awaitFinished(Long amount, TimeUnit timeUnit); + + /** Obtain the number of bytes written */ + long getBytesWritten(); +} diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/HttpResourceRepositoryFromFileSystemImpl.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/HttpResourceRepositoryFromFileSystemImpl.java index 4ff95fce1..7a0683bab 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/HttpResourceRepositoryFromFileSystemImpl.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/HttpResourceRepositoryFromFileSystemImpl.java @@ -31,6 +31,7 @@ import org.aksw.jena_sparql_api.conjure.utils.HttpHeaderUtils; import org.aksw.jena_sparql_api.http.domain.api.RdfEntityInfo; import org.aksw.jena_sparql_api.http.repository.api.HttpResourceRepositoryFromFileSystem; +import org.aksw.jena_sparql_api.http.repository.api.RdfHttpEntity; import org.aksw.jena_sparql_api.http.repository.api.RdfHttpEntityFile; import org.aksw.jena_sparql_api.http.repository.api.RdfHttpResourceFile; import org.aksw.jena_sparql_api.http.repository.api.ResourceStore; @@ -64,331 +65,351 @@ import com.google.common.net.MediaType; public class HttpResourceRepositoryFromFileSystemImpl - implements HttpResourceRepositoryFromFileSystem + implements HttpResourceRepositoryFromFileSystem { - private static final Logger logger = LoggerFactory.getLogger(HttpResourceRepositoryFromFileSystemImpl.class); - - protected ResourceStore downloadStore; - protected ResourceStore cacheStore; - protected ResourceStore hashStore; - - public HttpResourceRepositoryFromFileSystemImpl() { - super(); - } - - public static Path hashToRelPath(String hash) { - List parts = Splitter - .fixedLength(8) - .splitToList(hash); - - String id = parts.stream() - .collect(Collectors.joining("/")); - - Path result = Paths.get(id); - return result; - } - - public static HttpResourceRepositoryFromFileSystemImpl create(Path absBasePath) { - HttpResourceRepositoryFromFileSystemImpl result = new HttpResourceRepositoryFromFileSystemImpl(); - result.setDownloadStore(new ResourceStoreImpl(absBasePath.resolve("downloads"))); - result.setCacheStore(new ResourceStoreImpl(absBasePath.resolve("cache"))); - - result.setHashStore(new ResourceStoreImpl(absBasePath.resolve("hash"), HttpResourceRepositoryFromFileSystemImpl::hashToRelPath)); - - return result; - } - - - public Collection getResourceStores() { - return Arrays.asList(downloadStore, cacheStore, hashStore); - } - - public ResourceStore getDownloadStore() { - return downloadStore; - } - - public void setDownloadStore(ResourceStore downloadStore) { - this.downloadStore = downloadStore; - } - - public ResourceStore getCacheStore() { - return cacheStore; - } - - public void setCacheStore(ResourceStore cacheStore) { - this.cacheStore = cacheStore; - } - - public ResourceStore getHashStore() { - return hashStore; - } - - public void setHashStore(ResourceStore hashStore) { - this.hashStore = hashStore; - } - - - public ResourceStore getStoreByPath(Path path) { - ResourceStore result = getResourceStores().stream() - .filter(store -> store.contains(path)) - .findFirst() - .orElse(null); - - return result; - } - - public Resource getInfo(Path path) { - Resource result = Optional.ofNullable(getStoreByPath(path)).map(store -> store.getInfo(path)) - .orElse(null); - return result; - } - - - public List readSymbolicLinkTransitive(Path absPath) { - - Set seen = new LinkedHashSet<>(); - - // tentative result list, may be set to null - // if the link resolves to a non-existent target - List result = new ArrayList<>(); - - while(Files.isSymbolicLink(absPath)) { - if(!Files.exists(absPath)) { - result = null; - break; - } - - if(seen.contains(absPath)) { - throw new RuntimeException("Cyclic symbolic link detected: " + seen); - } - seen.add(absPath); - - result.add(absPath); - - Path tmpPath; - try { - tmpPath = Files.readSymbolicLink(absPath); - } catch (IOException e) { - throw new RuntimeException(e); - } - absPath = absPath.getParent().resolve(tmpPath).normalize(); - } - - if(result != null) { - result.add(absPath); - Collections.reverse(result); - } - - return result; - } - - - // Assumes that there is at most 1 repository associated with a given path - public RdfHttpEntityFile getEntityForPath(Path path) { - List resolvedPaths = readSymbolicLinkTransitive(path); - Collection stores = getResourceStores(); - - RdfHttpEntityFile result = null; - - outer: for(Path resolvedPath : resolvedPaths) { - for(ResourceStore store : stores) { - result = store.getEntityForPath(resolvedPath); - if(result != null) { - break outer; - } - } - } - - return result; - } - - public Collection getEntities(String uri) { - Collection stores = getResourceStores(); - - Collection result = stores.stream() - .map(store -> store.getResource(uri)) - .flatMap(res -> res.getEntities().stream()) - //.flatMap(store -> store.listEntities(relPath).stream()) - .collect(Collectors.toList()); - return result; - } - - public static BasicHttpRequest createRequest(String url, String contentType, List encodings) { - BasicHttpRequest result = new BasicHttpRequest("GET", url); - result.setHeader(HttpHeaders.ACCEPT, contentType); - - List effectiveEncodings = new ArrayList<>(encodings); - if(!encodings.contains(IDENTITY_ENCODING) && !encodings.isEmpty()) { - effectiveEncodings.add("identity;q=0"); - } - - String encoding = effectiveEncodings.stream().collect(Collectors.joining(",")); - - result.setHeader(HttpHeaders.ACCEPT_ENCODING, encoding); - - return result; - } - - /** - * Convenience method for requesting a resource with given content type and encodingsS - * @param url - * @param contentType - * @param encodings - * @return - * @throws IOException - */ - public static RdfHttpEntityFile get(HttpResourceRepositoryFromFileSystem repo, String url, String contentType, List encodings) throws IOException { - BasicHttpRequest request = createRequest(url, contentType, encodings); - - RdfHttpEntityFile result = repo.get(request, HttpResourceRepositoryFromFileSystemImpl::resolveRequest); - return result; - } - - - public String bestEncoding(Collection encodings) { - PathCoderRegistry registry = PathCoderRegistry.get(); - String result = encodings.stream() - .filter(enc -> registry.getCoder(enc) != null) - .findFirst() - .orElse(null); - - return result; - } - - public MediaType bestContentType(Collection contentTypes) { - List supportedMediaTypes = HttpHeaderUtils.supportedMediaTypes(); - - MediaType result = contentTypes.stream() - .flatMap(range -> supportedMediaTypes.stream() - .filter(supportedMt -> supportedMt.is(range))) - .findFirst() - .orElse(null); - - return result; - } - - public static final String IDENTITY_ENCODING = "identity"; - - static class Plan { - protected Op op; - protected RdfEntityInfo info; - - public Plan(Op op, RdfEntityInfo info) { - super(); - this.op = op; - this.info = info; - } - - public Op getOp() { - return op; - } - - public RdfEntityInfo getInfo() { - return info; - } - } - - public Plan findBestPlanToServeRequest(HttpRequest request, - Collection entities, - OpExecutor opExecutor) throws IOException { - Header[] headers = request.getAllHeaders(); - - List supportedContentTypes = HttpHeaderUtils.supportedMediaTypes(); - Collection supportedEncodings = new ArrayList<>(Arrays.asList(IDENTITY_ENCODING)); - supportedEncodings.addAll(PathCoderRegistry.get().getCoderNames()); - - - // Get the requested content types in order of preference - Map requestedContentTypeRanges = HttpHeaderUtils.getOrderedValues(headers, HttpHeaders.ACCEPT).entrySet().stream() - .collect(Collectors.toMap(e -> MediaType.parse(e.getKey()), Entry::getValue)); - - // Get the requested encodings in order of preference - Map requestedEncodings = HttpHeaderUtils.getOrderedValues(headers, HttpHeaders.ACCEPT_ENCODING); - - if(!requestedEncodings.containsKey(IDENTITY_ENCODING)) { - requestedEncodings.put(IDENTITY_ENCODING, 1f); - } - - - // Filter the supported media types by the requested ones - // The supported media type must match a range in the headers whose score is greater than 0 - Map candidateTargetContentTypes = requestedContentTypeRanges.entrySet().stream() - .filter(rangeEntry -> rangeEntry.getValue() > 0) - .flatMap(rangeEntry -> supportedContentTypes.stream() - .filter(supported -> supported.is(rangeEntry.getKey())) - .map(supported -> Maps.immutableEntry(supported, rangeEntry.getValue()))) - .sorted((a, b) -> a.getValue().compareTo(b.getValue())) - .collect(Collectors.toMap(Entry::getKey, Entry::getValue, (a, b) -> a, LinkedHashMap::new)); - - Map candidateEncodings = requestedEncodings.entrySet().stream() - .filter(entry -> entry.getValue() > 0) - .flatMap(entry -> supportedEncodings.stream() - .filter(supported -> supported.equalsIgnoreCase(entry.getKey())) - .map(supported -> Maps.immutableEntry(supported, entry.getValue()))) - .sorted((a, b) -> a.getValue().compareTo(b.getValue())) - .collect(Collectors.toMap(Entry::getKey, Entry::getValue, (a, b) -> a, LinkedHashMap::new)); - - - // TODO Abstract the cartesian product so we can extend to any number of dimensions - //Table entityToPlanToScore = HashBasedTable.create(); - Multimap> entityToPlan = HashMultimap.create(); - - for(RdfHttpEntityFile entity : entities) { - // TODO Ensure entities are valid - // - e.g. manual deletion of files in the http cache can cause corruption - - RdfEntityInfo info = entity.getCombinedInfo().as(RdfEntityInfo.class); - //MediaType mt = MediaType.parse(info.getContentType()); - - for(Entry e : candidateTargetContentTypes.entrySet()) { - String tgtContentType = e.getKey().toString(); - Float tgtContentTypeScore = e.getValue(); - - for(Entry f : candidateEncodings.entrySet()) { - String tgtEncoding = f.getKey(); - Float tgtEncodingScore = f.getValue(); - - List tgtEncodings = tgtEncoding.equalsIgnoreCase(IDENTITY_ENCODING) - ? Collections.emptyList() - : Arrays.asList(tgtEncoding); - - Op op = Planner.createPlan(entity, tgtContentType, tgtEncodings); - if(op != null) { - op = opExecutor.optimizeInPlace(op); - - - int numOps = ResourceTreeUtils.getNumOps(op, Op::getChildren); - - RdfEntityInfo meta = ModelFactory.createDefaultModel().createResource() - .as(RdfEntityInfo.class) - .setContentType(tgtContentType) - .setContentEncodings(tgtEncodings); - - Plan plan = new Plan(op, meta); - - Entry planAndScore = Maps.immutableEntry(plan, (float)numOps); - - entityToPlan.put(entity, planAndScore); - } - } - } - } - - Entry> entry = entityToPlan.entries().stream() - .sorted((a, b) -> a.getValue().getValue().compareTo(b.getValue().getValue())) - .findFirst() - .orElse(null); - - Plan result = entry == null ? null : entry.getValue().getKey(); - - return result; - - //Map candidateToScore = new HashMap<>(); - - - // TODO Find best candidate among the file entities - -// + private static final Logger logger = LoggerFactory.getLogger(HttpResourceRepositoryFromFileSystemImpl.class); + + protected ResourceStore downloadStore; + protected ResourceStore cacheStore; + protected ResourceStore hashStore; + + public HttpResourceRepositoryFromFileSystemImpl() { + super(); + } + + /** + * Utility method to convert a hash value (e.g. sha256) to a relative path + * by splitting after 'n' characters + * + * @param hash + * @return + */ + public static Path hashToRelPath(String hash) { + List parts = Splitter + .fixedLength(8) + .splitToList(hash); + + String id = parts.stream() + .collect(Collectors.joining("/")); + + Path result = Paths.get(id); + return result; + } + + public static HttpResourceRepositoryFromFileSystemImpl create(Path absBasePath) { + HttpResourceRepositoryFromFileSystemImpl result = new HttpResourceRepositoryFromFileSystemImpl(); + result.setDownloadStore(new ResourceStoreImpl(absBasePath.resolve("downloads"))); + result.setCacheStore(new ResourceStoreImpl(absBasePath.resolve("cache"))); + + result.setHashStore(new ResourceStoreImpl(absBasePath.resolve("hash"), HttpResourceRepositoryFromFileSystemImpl::hashToRelPath)); + + return result; + } + + + public Collection getResourceStores() { + return Arrays.asList(downloadStore, cacheStore, hashStore); + } + + public ResourceStore getDownloadStore() { + return downloadStore; + } + + public void setDownloadStore(ResourceStore downloadStore) { + this.downloadStore = downloadStore; + } + + public ResourceStore getCacheStore() { + return cacheStore; + } + + public void setCacheStore(ResourceStore cacheStore) { + this.cacheStore = cacheStore; + } + + public ResourceStore getHashStore() { + return hashStore; + } + + public void setHashStore(ResourceStore hashStore) { + this.hashStore = hashStore; + } + + + public ResourceStore getStoreByPath(Path path) { + ResourceStore result = getResourceStores().stream() + .filter(store -> store.contains(path)) + .findFirst() + .orElse(null); + + return result; + } + + public Resource getInfo(Path path) { + Resource result = Optional.ofNullable(getStoreByPath(path)).map(store -> store.getInfo(path)) + .orElse(null); + return result; + } + + + public List readSymbolicLinkTransitive(Path absPath) { + + Set seen = new LinkedHashSet<>(); + + // tentative result list, may be set to null + // if the link resolves to a non-existent target + List result = new ArrayList<>(); + + while(Files.isSymbolicLink(absPath)) { + if(!Files.exists(absPath)) { + result = null; + break; + } + + if(seen.contains(absPath)) { + throw new RuntimeException("Cyclic symbolic link detected: " + seen); + } + seen.add(absPath); + + result.add(absPath); + + Path tmpPath; + try { + tmpPath = Files.readSymbolicLink(absPath); + } catch (IOException e) { + throw new RuntimeException(e); + } + absPath = absPath.getParent().resolve(tmpPath).normalize(); + } + + if(result != null) { + result.add(absPath); + Collections.reverse(result); + } + + return result; + } + + + // Assumes that there is at most 1 repository associated with a given path + public RdfHttpEntityFile getEntityForPath(Path path) { + List resolvedPaths = readSymbolicLinkTransitive(path); + Collection stores = getResourceStores(); + + RdfHttpEntityFile result = null; + + outer: for(Path resolvedPath : resolvedPaths) { + for(ResourceStore store : stores) { + result = store.getEntityForPath(resolvedPath); + if(result != null) { + break outer; + } + } + } + + return result; + } + + public Collection getEntities(String uri) { + Collection stores = getResourceStores(); + + Collection result = stores.stream() + .map(store -> store.getResource(uri)) + .flatMap(res -> res.getEntities().stream()) + //.flatMap(store -> store.listEntities(relPath).stream()) + .collect(Collectors.toList()); + return result; + } + + public static BasicHttpRequest createRequest(String url, String contentType, List encodings) { + BasicHttpRequest result = new BasicHttpRequest("GET", url); + result.setHeader(HttpHeaders.ACCEPT, contentType); + + List effectiveEncodings = new ArrayList<>(encodings); + if(!encodings.contains(IDENTITY_ENCODING) && !encodings.isEmpty()) { + effectiveEncodings.add("identity;q=0"); + } + + String encoding = effectiveEncodings.stream().collect(Collectors.joining(",")); + + result.setHeader(HttpHeaders.ACCEPT_ENCODING, encoding); + + return result; + } + + /** + * Convenience method for requesting a resource with given content type and encodingsS + * @param url + * @param contentType + * @param encodings + * @return + * @throws IOException + */ + public static RdfHttpEntityFile get(HttpResourceRepositoryFromFileSystem repo, String url, String contentType, List encodings) throws IOException { + BasicHttpRequest request = createRequest(url, contentType, encodings); + + RdfHttpEntityFile result = repo.get(request, HttpResourceRepositoryFromFileSystemImpl::resolveRequest); + return result; + } + + + public String bestEncoding(Collection encodings) { + PathCoderRegistry registry = PathCoderRegistry.get(); + String result = encodings.stream() + .filter(enc -> registry.getCoder(enc) != null) + .findFirst() + .orElse(null); + + return result; + } + + public MediaType bestContentType(Collection contentTypes) { + List supportedMediaTypes = HttpHeaderUtils.supportedMediaTypes(); + + MediaType result = contentTypes.stream() + .flatMap(range -> supportedMediaTypes.stream() + .filter(supportedMt -> supportedMt.is(range))) + .findFirst() + .orElse(null); + + return result; + } + + public static final String IDENTITY_ENCODING = "identity"; + + static class Plan { + protected Op op; + protected RdfEntityInfo info; + + public Plan(Op op, RdfEntityInfo info) { + super(); + this.op = op; + this.info = info; + } + + public Op getOp() { + return op; + } + + public RdfEntityInfo getInfo() { + return info; + } + } + + public Plan findBestPlanToServeRequest(HttpRequest request, + Collection entities, + OpExecutor opExecutor) throws IOException { + Header[] headers = request.getAllHeaders(); + + List supportedContentTypes = HttpHeaderUtils.supportedMediaTypes(); + Collection supportedEncodings = new ArrayList<>(Arrays.asList(IDENTITY_ENCODING)); + supportedEncodings.addAll(PathCoderRegistry.get().getCoderNames()); + + + // Get the requested content types in order of preference + Map requestedContentTypeRanges = HttpHeaderUtils.getOrderedValues(headers, HttpHeaders.ACCEPT).entrySet().stream() + .collect(Collectors.toMap(e -> MediaType.parse(e.getKey()), Entry::getValue)); + + // If no content type is requested then accept any + if (requestedContentTypeRanges.isEmpty()) { + for (RdfHttpEntity entity : entities) { + RdfEntityInfo info = entity.getCombinedInfo().as(RdfEntityInfo.class); + String mediaTypeStr = info.getContentType(); + if (mediaTypeStr != null) { + MediaType mediaType = MediaType.parse(mediaTypeStr); + requestedContentTypeRanges.put(mediaType, 1.0f); + } + } + } + + + // Get the requested encodings in order of preference + Map requestedEncodings = HttpHeaderUtils.getOrderedValues(headers, HttpHeaders.ACCEPT_ENCODING); + + if(!requestedEncodings.containsKey(IDENTITY_ENCODING)) { + requestedEncodings.put(IDENTITY_ENCODING, 1f); + } + + + // Filter the supported media types by the requested ones + // The supported media type must match a range in the headers whose score is greater than 0 + Map candidateTargetContentTypes = requestedContentTypeRanges.entrySet().stream() + .filter(rangeEntry -> rangeEntry.getValue() > 0) + .flatMap(rangeEntry -> supportedContentTypes.stream() + .filter(supported -> supported.is(rangeEntry.getKey())) + .map(supported -> Maps.immutableEntry(supported, rangeEntry.getValue()))) + .sorted((a, b) -> a.getValue().compareTo(b.getValue())) + .collect(Collectors.toMap(Entry::getKey, Entry::getValue, (a, b) -> a, LinkedHashMap::new)); + + Map candidateEncodings = requestedEncodings.entrySet().stream() + .filter(entry -> entry.getValue() > 0) + .flatMap(entry -> supportedEncodings.stream() + .filter(supported -> supported.equalsIgnoreCase(entry.getKey())) + .map(supported -> Maps.immutableEntry(supported, entry.getValue()))) + .sorted((a, b) -> a.getValue().compareTo(b.getValue())) + .collect(Collectors.toMap(Entry::getKey, Entry::getValue, (a, b) -> a, LinkedHashMap::new)); + + + // TODO Abstract the cartesian product so we can extend to any number of dimensions + //Table entityToPlanToScore = HashBasedTable.create(); + Multimap> entityToPlan = HashMultimap.create(); + + for(RdfHttpEntityFile entity : entities) { + // TODO Ensure entities are valid + // - e.g. manual deletion of files in the http cache can cause corruption + + RdfEntityInfo info = entity.getCombinedInfo().as(RdfEntityInfo.class); + //MediaType mt = MediaType.parse(info.getContentType()); + + for(Entry e : candidateTargetContentTypes.entrySet()) { + String tgtContentType = e.getKey().toString(); + Float tgtContentTypeScore = e.getValue(); + + for(Entry f : candidateEncodings.entrySet()) { + String tgtEncoding = f.getKey(); + Float tgtEncodingScore = f.getValue(); + + List tgtEncodings = tgtEncoding.equalsIgnoreCase(IDENTITY_ENCODING) + ? Collections.emptyList() + : Arrays.asList(tgtEncoding); + + Op op = Planner.createPlan(entity, tgtContentType, tgtEncodings); + if(op != null) { + op = opExecutor.optimizeInPlace(op); + + + int numOps = ResourceTreeUtils.getNumOps(op, Op::getChildren); + + RdfEntityInfo meta = ModelFactory.createDefaultModel().createResource() + .as(RdfEntityInfo.class) + .setContentType(tgtContentType) + .setContentEncodings(tgtEncodings); + + Plan plan = new Plan(op, meta); + + Entry planAndScore = Maps.immutableEntry(plan, (float)numOps); + + entityToPlan.put(entity, planAndScore); + } + } + } + } + + Entry> entry = entityToPlan.entries().stream() + .sorted((a, b) -> a.getValue().getValue().compareTo(b.getValue().getValue())) + .findFirst() + .orElse(null); + + Plan result = entry == null ? null : entry.getValue().getKey(); + + return result; + + //Map candidateToScore = new HashMap<>(); + + + // TODO Find best candidate among the file entities + +// // // Pick entity with the best score // RdfHttpEntityFile entity = entityToScore.entrySet().stream() // .sorted((a, b) -> a.getValue().compareTo(b.getValue())) @@ -396,97 +417,97 @@ public Plan findBestPlanToServeRequest(HttpRequest request, // .map(Entry::getKey) // .orElse(null); - } - - - public static HttpRequest expandHttpRequest(HttpRequest request) { - HttpUriRequest result = - RequestBuilder - .copy(request) - .build(); - - Header[] origHeaders = result.getAllHeaders(); - Header[] newHeaders = ContentTypeUtils.expandAccept(origHeaders); - - // TODO Add encoding - - result.setHeaders(newHeaders); - - return result; - } - - public boolean validateEntity(RdfHttpEntityFile entity) { - Path path = entity.getAbsolutePath(); - boolean result = Files.exists(path); - - return result; - } - - /** - * Lookup an entity - * - * First, this method checks if the request can be served from the locally cached entities: - * It attempts to create a plan that transforms the available entities into a requested one. - * If this fails, this method examines the cached resource vary headers for whether fetching a remote entity - * can help to serve the request. If there is no cached resource, - * a request to the remote server is made with expanded headers. - * - * If this leads to a new entity being generated, then the process of planning is repeated with it. - * - * - * TODO I thought there was a way to enumerate in the HTTP headers for which values exists for Vary - * By default Vary only says that a request with a different value for the given header name may yield a different representation - * - * - * @param request - * @param httpRequester - * @return - * @throws IOException - */ - @Override - public RdfHttpEntityFile get(HttpRequest request, Function> httpRequester) throws IOException { - // Expand the request: Add compatible accept headers and encodings - - String uri = request.getRequestLine().getUri(); - - + } + + + public static HttpRequest expandHttpRequest(HttpRequest request) { + HttpUriRequest result = + RequestBuilder + .copy(request) + .build(); + + Header[] origHeaders = result.getAllHeaders(); + Header[] newHeaders = ContentTypeUtils.expandAccept(origHeaders); + + // TODO Add encoding + + result.setHeaders(newHeaders); + + return result; + } + + public boolean validateEntity(RdfHttpEntityFile entity) { + Path path = entity.getAbsolutePath(); + boolean result = Files.exists(path); + + return result; + } + + /** + * Lookup an entity + * + * First, this method checks if the request can be served from the locally cached entities: + * It attempts to create a plan that transforms the available entities into a requested one. + * If this fails, this method examines the cached resource vary headers for whether fetching a remote entity + * can help to serve the request. If there is no cached resource, + * a request to the remote server is made with expanded headers. + * + * If this leads to a new entity being generated, then the process of planning is repeated with it. + * + * + * TODO I thought there was a way to enumerate in the HTTP headers for which values exists for Vary + * By default Vary only says that a request with a different value for the given header name may yield a different representation + * + * + * @param request + * @param httpRequester + * @return + * @throws IOException + */ + @Override + public RdfHttpEntityFile get(HttpRequest request, Function> httpRequester) throws IOException { + // Expand the request: Add compatible accept headers and encodings + + String uri = request.getRequestLine().getUri(); + + // if(uri.contains("db3fc357b775f2e996f88c87ddfacc64/db3fc357b775f2e996f88c87ddfacc64.hdt")) { // System.out.println("DEBUG POINT"); // } - - //RdfHttpResourceFile res = store.get(uri); - - - Collection entities = getEntities(uri); - List validatedEntities = entities.stream() - .filter(this::validateEntity) - .collect(Collectors.toList()); - - OpExecutor opExecutor = new OpExecutor(this, hashStore); - - Plan plan = findBestPlanToServeRequest(request, validatedEntities, opExecutor); - - //result = null; - if(plan == null) { - RdfHttpResourceFile res = downloadStore.getResource(uri); - HttpRequest newRequest = expandHttpRequest(request); - - if(httpRequester != null) { - Entry response = httpRequester.apply(newRequest); - - - RdfHttpEntityFile entity = saveResponse(res, response.getKey(), response.getValue()); - - - // Validation step; the entity should match the - plan = findBestPlanToServeRequest(request, Collections.singleton(entity), opExecutor); - } - } - - if(plan == null) { - return null; - //throw new RuntimeException("Could not create a plan for how to serve an HTTP request"); - } + + //RdfHttpResourceFile res = store.get(uri); + + + Collection entities = getEntities(uri); + List validatedEntities = entities.stream() + .filter(this::validateEntity) + .collect(Collectors.toList()); + + OpExecutor opExecutor = new OpExecutor(this, hashStore); + + Plan plan = findBestPlanToServeRequest(request, validatedEntities, opExecutor); + + //result = null; + if(plan == null) { + RdfHttpResourceFile res = downloadStore.getResource(uri); + HttpRequest newRequest = expandHttpRequest(request); + + if(httpRequester != null) { + Entry response = httpRequester.apply(newRequest); + + + RdfHttpEntityFile entity = saveResponse(res, response.getKey(), response.getValue()); + + + // Validation step; the entity should match the + plan = findBestPlanToServeRequest(request, Collections.singleton(entity), opExecutor); + } + } + + if(plan == null) { + return null; + //throw new RuntimeException("Could not create a plan for how to serve an HTTP request"); + } // // Convert the entity to the request // String bestEncoding = bestEncoding(encodings.keySet()); @@ -500,271 +521,271 @@ public RdfHttpEntityFile get(HttpRequest request, Function { - Checksum hi = info.getModel().createResource().as(Checksum.class); - - hi.setAlgorithm("sha256").setChecksum(str); - Collection hashes = info.as(RdfEntityInfo.class).getHashes(); - hashes.add(hi); - }); - - } catch(Exception e) { - throw new RuntimeException(e); - } - - } - - /** - * Derives the suffix which to append to the base path from the entity's headers. - * - * @param basePath - * @param entity - * @throws IOException - * @throws UnsupportedOperationException - */ - public RdfHttpEntityFile saveResponse(RdfHttpResourceFile targetResource, HttpRequest request, HttpResponse response) throws UnsupportedOperationException, IOException { - HttpEntity entity = response.getEntity(); - - // If the type is application/octet-steam we - // can try to derive content type and encodings from - // a content-disposition header or the original URI - // In fact, we can try both things, and see whether any yields results - // the results can be verified afterwards (e.g. by Files.probeContentType) - // hm, since content-disposition seems to be non-standard maybe we can also just ignore it - - String ct = HttpHeaderUtils.getValueOrNull(entity.getContentType()); - - // TODO Move the logic to derive the headers we want elsewhere - // E.g. apache2 may return gzip files as content type instead of encoding - RdfEntityInfo meta = HttpHeaderUtils.copyMetaData(entity, null); - String uri = request.getRequestLine().getUri(); - if(ct == null - || ct.equalsIgnoreCase(ContentType.APPLICATION_OCTET_STREAM.getMimeType()) - || ct.equalsIgnoreCase(ContentType.TEXT_PLAIN.getMimeType()) - || ct.equalsIgnoreCase(ContentType.parse("application/x-gzip").getMimeType()) + + Path tgt = op.accept(opExecutor); + + RdfHttpEntityFile entity; + + Path hashPath = hashStore.getAbsolutePath(); + // If the path points to the hash store, copy the result to the resources' cache + if(tgt.startsWith(hashPath)) { + if(!Files.isSymbolicLink(tgt)) { + + RdfEntityInfo meta = plan.getInfo(); + + entity = cacheStore.allocateEntity(uri, meta); + Path tgtPath = entity.getAbsolutePath(); + + forceCreateDirectories(tgtPath.getParent()); + + // HACK - Replace existing should not be needed + try { + Files.move(tgt, tgtPath /*, StandardCopyOption.REPLACE_EXISTING */); /*, StandardCopyOption.ATOMIC_MOVE */ + } + catch(Exception e) { + logger.warn("Should not happen: Failed move " + tgt + " to " + tgtPath, e); + } + // Note: It is important that we relativize based on the target file's directory, + // hence tgt.getParent() + Path relTgtPath = tgt.getParent().relativize(tgtPath); + + Files.createSymbolicLink(tgt, relTgtPath); + + entity = cacheStore.getEntityForPath(tgtPath); + + computeHashForEntity(entity, null); + } else { + Path relPathTgt = Files.readSymbolicLink(tgt); + Path absPath = tgt.getParent().resolve(relPathTgt).normalize(); + entity = getEntityForPath(absPath); + } + + } else { + entity = getEntityForPath(tgt); + } + + + + return entity; + } + + + public static void forceCreateFile(Path path) { + try { + Files.createFile(path); + } catch (FileAlreadyExistsException e) { + // Ignored + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public static void forceCreateDirectories(Path path) { + try { + Files.createDirectories(path); + } catch (FileAlreadyExistsException e) { + // Ignored + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + + /** + * Compute a hash (sha256) for the content at path tmp and associate it with the given + * entity. If tmp is null, use the content of the entity. + * + * @param rdfEntity + * @param tmp + */ + public static void computeHashForEntity(RdfHttpEntityFile rdfEntity, Path tmp) { + Path targetPath = rdfEntity.getAbsolutePath(); + + if(tmp == null) { + tmp = targetPath; + } + + + // Compute hash + ByteSource bs = org.apache.jena.ext.com.google.common.io.Files.asByteSource(tmp.toFile()); + + HashCode hashCode; + try { + hashCode = bs.hash(Hashing.sha256()); + String str = hashCode.toString(); + + forceCreateFile(targetPath); + + rdfEntity.updateInfo(info -> { + Checksum hi = info.getModel().createResource().as(Checksum.class); + + hi.setAlgorithm("sha256").setChecksum(str); + Collection hashes = info.as(RdfEntityInfo.class).getHashes(); + hashes.add(hi); + }); + + } catch(Exception e) { + throw new RuntimeException(e); + } + + } + + /** + * Derives the suffix which to append to the base path from the entity's headers. + * + * @param basePath + * @param entity + * @throws IOException + * @throws UnsupportedOperationException + */ + public RdfHttpEntityFile saveResponse(RdfHttpResourceFile targetResource, HttpRequest request, HttpResponse response) throws UnsupportedOperationException, IOException { + HttpEntity entity = response.getEntity(); + + // If the type is application/octet-steam we + // can try to derive content type and encodings from + // a content-disposition header or the original URI + // In fact, we can try both things, and see whether any yields results + // the results can be verified afterwards (e.g. by Files.probeContentType) + // hm, since content-disposition seems to be non-standard maybe we can also just ignore it + + String ct = HttpHeaderUtils.getValueOrNull(entity.getContentType()); + + // TODO Move the logic to derive the headers we want elsewhere + // E.g. apache2 may return gzip files as content type instead of encoding + RdfEntityInfo meta = HttpHeaderUtils.copyMetaData(entity, null); + String uri = request.getRequestLine().getUri(); + if(ct == null + || ct.equalsIgnoreCase(ContentType.APPLICATION_OCTET_STREAM.getMimeType()) + || ct.equalsIgnoreCase(ContentType.TEXT_PLAIN.getMimeType()) + || ct.equalsIgnoreCase(ContentType.parse("application/x-gzip").getMimeType()) // || ct.equalsIgnoreCase(ContentType.parse("application/x-bzip").getMimeType()) - || ct.equalsIgnoreCase(ContentType.parse("application/x-bzip2").getMimeType()) - ) { - meta = ContentTypeUtils.deriveHeadersFromFileExtension(uri); - } - - RdfHttpEntityFile rdfEntity = targetResource.allocate(meta); - - //Path targetPath = res.getPath().resolve("data"); - Path targetPath = rdfEntity.getAbsolutePath(); - - // HACK - this assumes the target path refers to a file (and not a directory)! - Files.createDirectories(targetPath.getParent()); - - Path tmp = FileUtils.allocateTmpFile(targetPath); - Files.copy(entity.getContent(), tmp, StandardCopyOption.REPLACE_EXISTING); - - // Issue: computeHashForEntity requires the entity file to exist and thus creates a zero byte file - // However, Files.move will then cause a file already exists exception - computeHashForEntity(rdfEntity, tmp); - - logger.info("For url " + uri + " moving file " + tmp + " to " + targetPath); - Files.move(tmp, targetPath, StandardCopyOption.REPLACE_EXISTING /*, StandardCopyOption.ATOMIC_MOVE */); - - //RdfFileEntity result = new RdfFileEntityImpl(finalPath, meta); + || ct.equalsIgnoreCase(ContentType.parse("application/x-bzip2").getMimeType()) + ) { + meta = ContentTypeUtils.deriveHeadersFromFileExtension(uri); + } + + RdfHttpEntityFile rdfEntity = targetResource.allocate(meta); + + //Path targetPath = res.getPath().resolve("data"); + Path targetPath = rdfEntity.getAbsolutePath(); + + // HACK - this assumes the target path refers to a file (and not a directory)! + Files.createDirectories(targetPath.getParent()); + + Path tmp = FileUtils.allocateTmpFile(targetPath); + Files.copy(entity.getContent(), tmp, StandardCopyOption.REPLACE_EXISTING); + + // Issue: computeHashForEntity requires the entity file to exist and thus creates a zero byte file + // However, Files.move will then cause a file already exists exception + computeHashForEntity(rdfEntity, tmp); + + logger.info("For url " + uri + " moving file " + tmp + " to " + targetPath); + Files.move(tmp, targetPath, StandardCopyOption.REPLACE_EXISTING /*, StandardCopyOption.ATOMIC_MOVE */); + + //RdfFileEntity result = new RdfFileEntityImpl(finalPath, meta); // result.setContentType(meta.getContentType()); // result.setContentEncoding(meta.getContentEncoding()); - - return rdfEntity; - } - - /** - * May rewrite an original request and returns it together with its response - * - * @param request - * @return - */ - public static Entry resolveRequest(HttpRequest request) { - String url = request.getRequestLine().getUri(); + + return rdfEntity; + } + + /** + * May rewrite an original request and returns it together with its response + * + * @param request + * @return + */ + public static Entry resolveRequest(HttpRequest request) { + String url = request.getRequestLine().getUri(); // // // Extract a dataset id from the URI // // Check all data catalogs for whether they can resolve the id -// +// // // Fake a request to a catalog for now - the result is a dcat model // Model m = RDFDataMgr.loadModel("/home/raven/.dcat/repository/datasets/data/www.example.org/dataset-dbpedia-2016-10-core/_content/dcat.ttl"); -// +// // //System.out.println(m.size()); -// +// // String url = "http://downloads.dbpedia.org/2016-10/core-i18n/en/genders_en.ttl.bz2"; // //String url = m.listObjectsOfProperty(DCAT.downloadURL).mapWith(x -> x.asNode().getURI()).next(); // System.out.println(url); - - HttpClient client = HttpClientBuilder.create().build(); - - - HttpUriRequest myRequest = - RequestBuilder - .copy(request) - .setUri(url) - .build(); - - //new DefaultHttpRequestFactory(). - HttpResponse response; - try { - response = client.execute(myRequest); - } catch(Exception e) { - throw new RuntimeException(e); - } - //client.execute(request, context) - - //m.listObjectsOfProperty(DCAT.downloadURL).toList(); - - //throw new RuntimeException("not implemented yet"); - return Maps.immutableEntry(myRequest, response); - } - - public static Path getDefaultPath() { - String homeDir = StandardSystemProperty.USER_HOME.value(); - Path result = Paths.get(homeDir).resolve(".dcat/repository"); - - return result; - } - public static HttpResourceRepositoryFromFileSystemImpl createDefault() throws IOException { - Path root = getDefaultPath(); - Files.createDirectories(root); - - HttpResourceRepositoryFromFileSystemImpl result = create(root); - - return result; - } - - public static void main(String[] args) throws IOException { - JenaSystem.init(); - - - Header[] expansionTest = new Header[] { new BasicHeader(HttpHeaders.ACCEPT, WebContent.contentTypeTurtleAlt2 + ";q=0.3")}; + + HttpClient client = HttpClientBuilder.create().build(); + + + HttpUriRequest myRequest = + RequestBuilder + .copy(request) + .setUri(url) + .build(); + + //new DefaultHttpRequestFactory(). + HttpResponse response; + try { + response = client.execute(myRequest); + } catch(Exception e) { + throw new RuntimeException(e); + } + //client.execute(request, context) + + //m.listObjectsOfProperty(DCAT.downloadURL).toList(); + + //throw new RuntimeException("not implemented yet"); + return Maps.immutableEntry(myRequest, response); + } + + public static Path getDefaultPath() { + String homeDir = StandardSystemProperty.USER_HOME.value(); + Path result = Paths.get(homeDir).resolve(".dcat/repository"); + + return result; + } + public static HttpResourceRepositoryFromFileSystemImpl createDefault() throws IOException { + Path root = getDefaultPath(); + Files.createDirectories(root); + + HttpResourceRepositoryFromFileSystemImpl result = create(root); + + return result; + } + + public static void main(String[] args) throws IOException { + JenaSystem.init(); + + + Header[] expansionTest = new Header[] { new BasicHeader(HttpHeaders.ACCEPT, WebContent.contentTypeTurtleAlt2 + ";q=0.3")}; // Header[] expansionTest = new Header[] { new BasicHeader(HttpHeaders.ACCEPT, WebContent.contentTypeTurtleAlt2 + ",text/plain;q=0.5")}; - expansionTest = ContentTypeUtils.expandAccept(expansionTest); - System.out.println("Expanded: " + Arrays.asList(expansionTest)); - + expansionTest = ContentTypeUtils.expandAccept(expansionTest); + System.out.println("Expanded: " + Arrays.asList(expansionTest)); + // if(true) { // return; // } - - Path root = Paths.get("/home/raven/.dcat/test3"); - Files.createDirectories(root); - HttpResourceRepositoryFromFileSystemImpl manager = create(root); + Path root = Paths.get("/home/raven/.dcat/test3"); + Files.createDirectories(root); + + HttpResourceRepositoryFromFileSystemImpl manager = create(root); + + ResourceStore store = manager.getDownloadStore(); + ResourceStore hashStore = manager.getHashStore(); + - ResourceStore store = manager.getDownloadStore(); - ResourceStore hashStore = manager.getHashStore(); - - // String url = "/home/raven/.dcat/test3/genders_en.ttl.bz2"; - String url = "http://downloads.dbpedia.org/2016-10/core-i18n/en/genders_en.ttl.bz2"; + String url = "http://downloads.dbpedia.org/2016-10/core-i18n/en/genders_en.ttl.bz2"; // String url = "/home/raven/Projects/limbo/git/train_3-dataset/target/metadata-catalog/catalog.all.ttl"; // Model m = RDFDataMgr.loadModel(url); // try(RDFConnection conn = RDFConnectionFactory.connect(DatasetFactory.wrap(m))) { @@ -774,30 +795,30 @@ public static void main(String[] args) throws IOException { // } // } // } - - RdfHttpEntityFile entity = HttpResourceRepositoryFromFileSystemImpl - .get(manager, url, WebContent.contentTypeRDFXML, Arrays.asList("bzip2")); - //RdfHttpEntityFile entity = manager.get(url, WebContent.contentTypeTurtle, Arrays.asList("gzip")); - - //RdfHttpResourceFile res = store.getResource(url); - //RdfHttpEntityFile entity = res.getEntities().iterator().next(); - - //Planner.execute(op); - - - if(true) { - return; - } - + + RdfHttpEntityFile entity = HttpResourceRepositoryFromFileSystemImpl + .get(manager, url, WebContent.contentTypeRDFXML, Arrays.asList("bzip2")); + //RdfHttpEntityFile entity = manager.get(url, WebContent.contentTypeTurtle, Arrays.asList("gzip")); + + //RdfHttpResourceFile res = store.getResource(url); + //RdfHttpEntityFile entity = res.getEntities().iterator().next(); + + //Planner.execute(op); + + + if(true) { + return; + } + // RdfFileResource res = rm.get("http://downloads.dbpedia.org/2016-10/core-i18n/en/genders_en.ttl.bz2"); - - - BasicHttpRequest r = new BasicHttpRequest("GET", url); - r.setHeader(HttpHeaders.ACCEPT, WebContent.contentTypeTurtleAlt2); - r.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip,identity;q=0"); - manager.get(r, HttpResourceRepositoryFromFileSystemImpl::resolveRequest); - - } + + BasicHttpRequest r = new BasicHttpRequest("GET", url); + r.setHeader(HttpHeaders.ACCEPT, WebContent.contentTypeTurtleAlt2); + r.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip,identity;q=0"); + + manager.get(r, HttpResourceRepositoryFromFileSystemImpl::resolveRequest); + + } } diff --git a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/UriToPathUtils.java b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/UriToPathUtils.java index 6b0da39f5..7be33fa27 100644 --- a/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/UriToPathUtils.java +++ b/jena-sparql-api-conjure/src/main/java/org/aksw/jena_sparql_api/http/repository/impl/UriToPathUtils.java @@ -3,59 +3,82 @@ import java.net.URI; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Arrays; import java.util.Optional; +import java.util.stream.Collectors; import org.aksw.commons.util.strings.StringUtils; +import org.apache.commons.lang3.ArrayUtils; public class UriToPathUtils { - - /** - * Default mapping of URIs to relative paths - * - * scheme://host:port/path?query becomes - * host/port/path/query - * - * @param uri - * @return - */ - public static Path resolvePath(URI uri) { - String a = Optional.ofNullable(uri.getHost()).orElse(""); - String b = uri.getPort() == -1 ? "" : Integer.toString(uri.getPort()); - - // Replace ~ (tilde) with _ because otherwise jena IRI validation will fail - // on file:// urls with SCHEME_PATTERN_MATCH_FAILED - // Tilde is common symbol with e.g. the Apache Web server's userdir mod - String pathStr = Optional.ofNullable(uri.getPath()).orElse("") - .replaceAll("~", "_"); - - Path result = Paths.get(".") - .resolve(a) - .resolve(b) - .resolve((a.isEmpty() && b.isEmpty() ? "" : ".") + pathStr) - .resolve(Optional.ofNullable(uri.getQuery()).orElse("")) - .normalize(); - - return result; - } - - public static Path resolvePath(String uri) { - URI u = URIUtils.newURI(uri); - - Path tmp = u == null ? - Paths.get(StringUtils.urlEncode(uri)) - : UriToPathUtils.resolvePath(u); - - // Make absolute paths relative (i.e. remove leading slashes) - Path result; - if(tmp.isAbsolute()) { - Path root = tmp.getRoot(); - result = root.relativize(tmp); - } else { - result = tmp; - } - - //logger.info("Resolved: " + uri + "\n to: " + result + "\n via: " + u); - return result; - } + + public static String hostNameToPath(String hostName) { + String[] parts = hostName.split("\\."); + ArrayUtils.reverse(parts); + + String result = Arrays.asList(parts).stream().collect(Collectors.joining("/")); +// Path result = Paths.get(str); + return result; + } + + /** + * Default mapping of URIs to relative paths + * + * scheme://host:port/path?query becomes + * host/port/path/query + * + * @param uri + * @return + */ + public static Path resolvePath(URI uri) { + return resolvePath(uri, true); + } + + public static Path resolvePath(URI uri, boolean hostNameToPath) { + String a = Optional.ofNullable(uri.getHost()) + .map(str -> hostNameToPath ? hostNameToPath(str) : str) + .orElse(""); + + String b = uri.getPort() == -1 ? "" : Integer.toString(uri.getPort()); + + // Replace ~ (tilde) with _ because otherwise jena IRI validation will fail + // on file:// urls with SCHEME_PATTERN_MATCH_FAILED + // Tilde is common symbol with e.g. the Apache Web server's userdir mod + String pathStr = Optional.ofNullable(uri.getPath()).orElse("") + .replaceAll("~", "_"); + + Path result = Paths.get(".") + .resolve(a) + .resolve(b) + .resolve((a.isEmpty() && b.isEmpty() ? "" : ".") + pathStr) + .resolve(Optional.ofNullable(uri.getQuery()).orElse("")) + .normalize(); + + return result; + } + + public static Path resolvePath(String uri) { + return resolvePath(uri, true); + } + + public static Path resolvePath(String uri, boolean hostNameToPath) { + URI u = URIUtils.newURI(uri); + + Path tmp = u == null ? + Paths.get(StringUtils.urlEncode(uri)) + : UriToPathUtils.resolvePath(u); + + // Make absolute paths relative (i.e. remove leading slashes) + Path result; + if(tmp.isAbsolute()) { + Path root = tmp.getRoot(); + result = root.relativize(tmp); + } else { + result = tmp; + } + + //logger.info("Resolved: " + uri + "\n to: " + result + "\n via: " + u); + return result; + } } diff --git a/jena-sparql-api-conjure/src/test/java/org/aksw/jena_sparql_api/conjure/test/MainConjurePlayground.java b/jena-sparql-api-conjure/src/test/java/org/aksw/jena_sparql_api/conjure/test/MainConjurePlayground.java index c50b1c72d..8a7561e65 100644 --- a/jena-sparql-api-conjure/src/test/java/org/aksw/jena_sparql_api/conjure/test/MainConjurePlayground.java +++ b/jena-sparql-api-conjure/src/test/java/org/aksw/jena_sparql_api/conjure/test/MainConjurePlayground.java @@ -131,7 +131,7 @@ public static void main(String[] args) throws Exception { System.out.println("Op Vars: " + job.getOpVars()); System.out.println("Literal Vars: " + job.getDeclaredVars()); - JobInstance ji = JobUtils.createJobInstance(job, env, map); + JobInstance ji = JobUtils.createJobInstanceWithCopy(job, env, map); System.out.println("EnvMap: " + ji.getEnvMap()); System.out.println("OpMap: " + ji.getOpVarMap()); diff --git a/jena-sparql-api-core/pom.xml b/jena-sparql-api-core/pom.xml index e97f8d291..2309ab3fd 100644 --- a/jena-sparql-api-core/pom.xml +++ b/jena-sparql-api-core/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionFactoryEx.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionFactoryEx.java deleted file mode 100644 index 8f4d7ec6a..000000000 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionFactoryEx.java +++ /dev/null @@ -1,287 +0,0 @@ -package org.aksw.jena_sparql_api.core; - -import java.lang.reflect.Field; -import java.util.function.Consumer; -import java.util.function.Function; - -import org.aksw.jena_sparql_api.core.connection.QueryExecutionFactorySparqlQueryConnection; -import org.aksw.jena_sparql_api.core.connection.SparqlQueryConnectionJsa; -import org.aksw.jena_sparql_api.stmt.SparqlStmt; -import org.aksw.jena_sparql_api.utils.Symbols; -import org.apache.jena.query.Dataset; -import org.apache.jena.query.Query; -import org.apache.jena.query.QueryExecution; -import org.apache.jena.rdf.model.ModelFactory; -import org.apache.jena.rdf.model.Resource; -import org.apache.jena.rdfconnection.RDFConnection; -import org.apache.jena.rdfconnection.RDFConnectionFactory; -import org.apache.jena.rdfconnection.RDFConnectionLocal; -import org.apache.jena.rdfconnection.RDFConnectionModular; -import org.apache.jena.rdfconnection.SparqlQueryConnection; -import org.apache.jena.rdfconnection.SparqlUpdateConnection; -import org.apache.jena.riot.WebContent; -import org.apache.jena.sparql.core.DatasetDescription; -import org.apache.jena.sparql.engine.http.QueryEngineHTTP; -import org.apache.jena.sparql.util.Context; -import org.apache.jena.update.UpdateExecutionFactory; -import org.apache.jena.update.UpdateProcessor; -import org.apache.jena.update.UpdateRequest; - -public class RDFConnectionFactoryEx { - - // TODO Consider move to a better place - e.g. RDFConnectionFactoryEx - public static RDFConnectionEx connect(String serviceUrl, DatasetDescription datasetDescription) { - SparqlServiceReference ssr = new SparqlServiceReference(serviceUrl, datasetDescription); - RDFConnectionEx result = connect(ssr); - return result; - } - - public static RDFConnectionEx connect(SparqlServiceReference ssr) { - String serviceUrl = ssr.getServiceURL(); - RDFConnection rawConn = RDFConnectionFactory.connect(serviceUrl); - DatasetDescription datasetDescription = ssr.getDatasetDescription(); - RDFConnection core = wrapWithDatasetAndXmlContentType(rawConn, datasetDescription); - - RDFConnectionMetaData metadata = ModelFactory.createDefaultModel() - .createResource().as(RDFConnectionMetaData.class); - - metadata.setServiceURL(ssr.getServiceURL()); - metadata.getDefaultGraphs().addAll(ssr.getDefaultGraphURIs()); - metadata.getNamedGraphs().addAll(ssr.getNamedGraphURIs()); - - RDFConnectionEx result = new RDFConnectionExImpl(core, metadata); - - return result; - } - - /** - * Wrap a connection with one that provides metadata. - * If the given metadata is null, an empty blank node will be created. - * - * @param rawConn - * @param metadata - * @return - */ - public static RDFConnectionEx wrap(RDFConnection rawConn, Resource metadata) { - if(metadata == null) { - metadata = ModelFactory.createDefaultModel().createResource(); - } - - RDFConnectionMetaData md = metadata.as(RDFConnectionMetaData.class); - - RDFConnectionEx result = new RDFConnectionExImpl(rawConn, md); - return result; - } - - - public static RDFConnection wrapWithQueryTransform(RDFConnection conn, Function fn) { - RDFConnection result = - new RDFConnectionModular(new SparqlQueryConnectionJsa( - FluentQueryExecutionFactory - .from(new QueryExecutionFactorySparqlQueryConnection(conn)) - .config() - .withQueryTransform(fn) - .end() - .create() - ), conn, conn); - - return result; - } - - public static RDFConnection wrapWithDatasetAndXmlContentType(RDFConnection rawConn, DatasetDescription datasetDescription) { - RDFConnection result = - new RDFConnectionModular(new SparqlQueryConnectionJsa( - FluentQueryExecutionFactory - .from(new QueryExecutionFactorySparqlQueryConnection(rawConn)) - .config() - //.withClientSideConstruct() - .withDatasetDescription(datasetDescription) - .withPostProcessor(qe -> { - if(qe instanceof QueryEngineHTTP) { - QueryEngineHTTP qeh = (QueryEngineHTTP)qe; - qeh.setSelectContentType(WebContent.contentTypeResultsXML); - qeh.setModelContentType(WebContent.contentTypeNTriples); - qeh.setDatasetContentType(WebContent.contentTypeNQuads); - } - }) - .end() - .create() - ), rawConn, rawConn); - - - return result; - } - -// public static final Symbol symConnection = Symbol.create("http://jsa.aksw.org/connection"); - - - public static RDFConnection wrapWithQueryParser(RDFConnection rawConn, Function parser) { - RDFConnection result = - new RDFConnectionModular(rawConn, rawConn, rawConn) { - @Override - public QueryExecution query(String queryString) { - SparqlStmt stmt = parser.apply(queryString); - Query query = stmt.getAsQueryStmt().getQuery(); - QueryExecution result = query(query); - return result; - } - - - @Override - public void update(String updateString) { - SparqlStmt stmt = parser.apply(updateString); - UpdateRequest updateRequest = stmt.getAsUpdateStmt().getUpdateRequest(); - update(updateRequest); - //QueryExecution result = query(query); - } - }; - -// new SparqlQueryConnectionJsa( -// FluentQueryExecutionFactory -// .from(new QueryExecutionFactorySparqlQueryConnection(rawConn)) -// .config() -// .withParser(parser) -// .end() -// .create() -// ), rawConn, rawConn); - - - return result; - } - - - public static SparqlQueryConnection getQueryConnection(RDFConnectionModular conn) { - SparqlQueryConnection result; - try { - Field f = RDFConnectionModular.class.getDeclaredField("queryConnection"); - f.setAccessible(true); - result = (SparqlQueryConnection)f.get(conn); - } catch(Exception e) { - throw new RuntimeException(e); - } - - return result; - } - - public static SparqlUpdateConnection getUpdateConnection(RDFConnectionModular conn) { - SparqlUpdateConnection result; - try { - Field f = RDFConnectionModular.class.getDeclaredField("updateConnection"); - f.setAccessible(true); - result = (SparqlUpdateConnection)f.get(conn); - } catch(Exception e) { - throw new RuntimeException(e); - } - - return result; - } - - public static Dataset getDataset(RDFConnectionLocal conn) { - Dataset result; - try { - Field f = RDFConnectionLocal.class.getDeclaredField("dataset"); - f.setAccessible(true); - result = (Dataset)f.get(conn); - } catch(Exception e) { - throw new RuntimeException(e); - } - return result; - } - - - public static SparqlQueryConnection unwrapQueryConnection(SparqlQueryConnection conn) { - SparqlQueryConnection result; - if(conn instanceof RDFConnectionModular) { - SparqlQueryConnection tmp = getQueryConnection((RDFConnectionModular)conn); - result = unwrapQueryConnection(tmp); - } else { - result = conn; - } - - return result; - } - - public static SparqlUpdateConnection unwrapUpdateConnection(SparqlUpdateConnection conn) { - SparqlUpdateConnection result; - if(conn instanceof RDFConnectionModular) { - SparqlUpdateConnection tmp = getUpdateConnection((RDFConnectionModular)conn); - result = unwrapUpdateConnection(tmp); - } else { - result = conn; - } - - return result; - } - - public static RDFConnection wrapWithContext(RDFConnection rawConn) { - return wrapWithContext(rawConn, cxt -> {}); - } - - - /** - * Places the connection object as a symbol into to context, - * so that custom functions - notably E_Benchmark can - * pose further queries to it. - * - * FIXME Connections are usually not intended for concurrent use; - * we should put a connection supplier into the context instead! - * - * @param rawConn - * @return - */ - public static RDFConnection wrapWithContext(RDFConnection rawConn, Consumer contextHandler) { - RDFConnection[] result = {null}; - - SparqlUpdateConnection tmp = unwrapUpdateConnection(rawConn); - Dataset dataset = tmp instanceof RDFConnectionLocal - ? getDataset((RDFConnectionLocal)tmp) - : null; - - result[0] = - new RDFConnectionModular(rawConn, rawConn, rawConn) { - public QueryExecution query(Query query) { - return postProcess(rawConn.query(query)); - } - - @Override - public QueryExecution query(String queryString) { - return postProcess(rawConn.query(queryString)); - } - - - @Override - public void update(UpdateRequest update) { -// checkOpen(); -// Txn.executeWrite(dataset, () -> { - UpdateProcessor tmp = UpdateExecutionFactory.create(update, dataset); - UpdateProcessor up = postProcess(tmp); - up.execute(); -// }); - } - - - public UpdateProcessor postProcess(UpdateProcessor qe) { - Context cxt = qe.getContext(); - if(cxt != null) { - cxt.set(Symbols.symConnection, result[0]); - contextHandler.accept(cxt); - } - - return qe; - } - - public QueryExecution postProcess(QueryExecution qe) { - Context cxt = qe.getContext(); - if(cxt != null) { - cxt.set(Symbols.symConnection, result[0]); - contextHandler.accept(cxt); - } - - return qe; - } - }; - - return result[0]; - } - -} diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/ConnectionLostException.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/ConnectionLostException.java new file mode 100644 index 000000000..69f99b22f --- /dev/null +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/ConnectionLostException.java @@ -0,0 +1,21 @@ +package org.aksw.jena_sparql_api.core.connection; + +import org.apache.jena.query.QueryException; + +public class ConnectionLostException + extends QueryException +{ + private static final long serialVersionUID = 1L; + + public ConnectionLostException(String msg) { + super(msg); + } + + public ConnectionLostException(Throwable cause) { + super(cause); + } + + public ConnectionLostException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/ConnectionReestablishedException.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/ConnectionReestablishedException.java new file mode 100644 index 000000000..8ac460fff --- /dev/null +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/ConnectionReestablishedException.java @@ -0,0 +1,13 @@ +package org.aksw.jena_sparql_api.core.connection; + +import org.apache.jena.query.QueryException; + +public class ConnectionReestablishedException + extends QueryException +{ + private static final long serialVersionUID = 1L; + + public ConnectionReestablishedException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionBuilder.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionBuilder.java index c871cda99..1d03d7393 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionBuilder.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionBuilder.java @@ -2,7 +2,6 @@ import java.util.function.Function; -import org.aksw.jena_sparql_api.core.RDFConnectionFactoryEx; import org.apache.jena.query.Dataset; import org.apache.jena.query.DatasetFactory; import org.apache.jena.query.Query; @@ -12,58 +11,62 @@ import org.apache.jena.rdfconnection.RDFConnectionFactory; public class RDFConnectionBuilder { - protected P parent; - protected T connection; - - public static RDFConnectionBuilder start() { - RDFConnectionBuilder result = new RDFConnectionBuilder(null); - return result; - } + protected P parent; + protected T connection; - public static RDFConnectionBuilder from(T conn) { - RDFConnectionBuilder result = new RDFConnectionBuilder(null); - result.setSource(conn); - return result; - } - - public RDFConnectionBuilder(P parent) { - super(); - this.parent = parent; - } + public static RDFConnectionBuilder start() { + RDFConnectionBuilder result = new RDFConnectionBuilder(null); + return result; + } - public RDFConnectionBuilder defaultModel() { - setSource(ModelFactory.createDefaultModel()); - return this; - } - - public RDFConnectionBuilder setSource(Model model) { - setSource(DatasetFactory.wrap(model)); - return this; - } + public static RDFConnectionBuilder from(T conn) { + RDFConnectionBuilder result = new RDFConnectionBuilder(null); + result.setSource(conn); + return result; + } - @SuppressWarnings("unchecked") - public RDFConnectionBuilder setSource(Dataset dataset) { - connection = (T)RDFConnectionFactory.connect(dataset); - - return this; - } - - public RDFConnectionBuilder setSource(T connection) { - this.connection = connection; - return this; - } + public RDFConnectionBuilder(P parent) { + super(); + this.parent = parent; + } - public RDFConnectionBuilder addQueryTransform(Function queryTransform) { - RDFConnection r = RDFConnectionFactoryEx.wrapWithQueryTransform((RDFConnection)this.connection, queryTransform); - - return new RDFConnectionBuilder(null).setSource(r); - } + public RDFConnectionBuilder defaultModel() { + setSource(ModelFactory.createDefaultModel()); + return this; + } - public T getConnection() { - return connection; - } - - public P end() { - return parent; - } + public RDFConnectionBuilder defaultDataset() { + setSource(DatasetFactory.create()); + return this; + } + + public RDFConnectionBuilder setSource(Model model) { + setSource(DatasetFactory.wrap(model)); + return this; + } + + @SuppressWarnings("unchecked") + public RDFConnectionBuilder setSource(Dataset dataset) { + connection = (T)RDFConnectionFactory.connect(dataset); + return this; + } + + public RDFConnectionBuilder setSource(T connection) { + this.connection = connection; + return this; + } + + public RDFConnectionBuilder addQueryTransform(Function queryTransform) { + RDFConnection r = RDFConnectionFactoryEx.wrapWithQueryTransform((RDFConnection)this.connection, queryTransform); + + return new RDFConnectionBuilder(null).setSource(r); + } + + public T getConnection() { + return connection; + } + + public P end() { + return parent; + } } \ No newline at end of file diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionEx.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionEx.java similarity index 75% rename from jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionEx.java rename to jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionEx.java index 7f5496d0b..ac2602f06 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionEx.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionEx.java @@ -1,4 +1,4 @@ -package org.aksw.jena_sparql_api.core; +package org.aksw.jena_sparql_api.core.connection; import org.apache.jena.rdfconnection.RDFConnection; diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionExImpl.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionExImpl.java similarity index 91% rename from jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionExImpl.java rename to jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionExImpl.java index 62f02081f..6ede9d303 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionExImpl.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionExImpl.java @@ -1,4 +1,4 @@ -package org.aksw.jena_sparql_api.core; +package org.aksw.jena_sparql_api.core.connection; import org.apache.jena.rdfconnection.RDFConnection; import org.apache.jena.rdfconnection.RDFConnectionWrapper; diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionFactoryEx.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionFactoryEx.java new file mode 100644 index 000000000..6bfe26880 --- /dev/null +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionFactoryEx.java @@ -0,0 +1,289 @@ +package org.aksw.jena_sparql_api.core.connection; + +import java.lang.reflect.Field; +import java.util.function.Consumer; +import java.util.function.Function; + +import org.aksw.jena_sparql_api.core.FluentQueryExecutionFactory; +import org.aksw.jena_sparql_api.core.SparqlServiceReference; +import org.aksw.jena_sparql_api.stmt.SparqlStmt; +import org.aksw.jena_sparql_api.utils.Symbols; +import org.apache.jena.query.Dataset; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryExecution; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdfconnection.RDFConnection; +import org.apache.jena.rdfconnection.RDFConnectionFactory; +import org.apache.jena.rdfconnection.RDFConnectionLocal; +import org.apache.jena.rdfconnection.RDFConnectionModular; +import org.apache.jena.rdfconnection.SparqlQueryConnection; +import org.apache.jena.rdfconnection.SparqlUpdateConnection; +import org.apache.jena.riot.WebContent; +import org.apache.jena.sparql.core.DatasetDescription; +import org.apache.jena.sparql.engine.http.QueryEngineHTTP; +import org.apache.jena.sparql.util.Context; +import org.apache.jena.update.UpdateExecutionFactory; +import org.apache.jena.update.UpdateProcessor; +import org.apache.jena.update.UpdateRequest; + +public class RDFConnectionFactoryEx { + + // TODO Consider move to a better place - e.g. RDFConnectionFactoryEx + public static RDFConnectionEx connect(String serviceUrl, DatasetDescription datasetDescription) { + SparqlServiceReference ssr = new SparqlServiceReference(serviceUrl, datasetDescription); + RDFConnectionEx result = connect(ssr); + return result; + } + + public static RDFConnectionEx connect(SparqlServiceReference ssr) { + String serviceUrl = ssr.getServiceURL(); + RDFConnection rawConn = RDFConnectionFactory.connect(serviceUrl); + DatasetDescription datasetDescription = ssr.getDatasetDescription(); + RDFConnection core = wrapWithDatasetAndXmlContentType(rawConn, datasetDescription); + + RDFConnectionMetaData metadata = ModelFactory.createDefaultModel() + .createResource().as(RDFConnectionMetaData.class); + + metadata.setServiceURL(ssr.getServiceURL()); + metadata.getDefaultGraphs().addAll(ssr.getDefaultGraphURIs()); + metadata.getNamedGraphs().addAll(ssr.getNamedGraphURIs()); + + RDFConnectionEx result = new RDFConnectionExImpl(core, metadata); + + return result; + } + + /** + * Wrap a connection with one that provides metadata. + * If the given metadata is null, an empty blank node will be created. + * + * @param rawConn + * @param metadata + * @return + */ + public static RDFConnectionEx wrap(RDFConnection rawConn, Resource metadata) { + if(metadata == null) { + metadata = ModelFactory.createDefaultModel().createResource(); + } + + RDFConnectionMetaData md = metadata.as(RDFConnectionMetaData.class); + + RDFConnectionEx result = new RDFConnectionExImpl(rawConn, md); + return result; + } + + + public static RDFConnection wrapWithQueryTransform(RDFConnection conn, Function fn) { + RDFConnection result = + new RDFConnectionModular(new SparqlQueryConnectionJsa( + FluentQueryExecutionFactory + .from(new QueryExecutionFactorySparqlQueryConnection(conn)) + .config() + .withQueryTransform(fn) + .end() + .create() + ), conn, conn); + + return result; + } + + + public static RDFConnection wrapWithDatasetAndXmlContentType(RDFConnection rawConn, DatasetDescription datasetDescription) { + RDFConnection result = + new RDFConnectionModular(new SparqlQueryConnectionJsa( + FluentQueryExecutionFactory + .from(new QueryExecutionFactorySparqlQueryConnection(rawConn)) + .config() + //.withClientSideConstruct() + .withDatasetDescription(datasetDescription) + .withPostProcessor(qe -> { + if(qe instanceof QueryEngineHTTP) { + QueryEngineHTTP qeh = (QueryEngineHTTP)qe; + qeh.setSelectContentType(WebContent.contentTypeResultsXML); + qeh.setModelContentType(WebContent.contentTypeNTriples); + qeh.setDatasetContentType(WebContent.contentTypeNQuads); + } + }) + .end() + .create() + ), rawConn, rawConn); + + + return result; + } + +// public static final Symbol symConnection = Symbol.create("http://jsa.aksw.org/connection"); + + + public static RDFConnection wrapWithQueryParser(RDFConnection rawConn, Function parser) { + RDFConnection result = + new RDFConnectionModular(rawConn, rawConn, rawConn) { + @Override + public QueryExecution query(String queryString) { + SparqlStmt stmt = parser.apply(queryString); + Query query = stmt.getAsQueryStmt().getQuery(); + QueryExecution result = query(query); + return result; + } + + + @Override + public void update(String updateString) { + SparqlStmt stmt = parser.apply(updateString); + UpdateRequest updateRequest = stmt.getAsUpdateStmt().getUpdateRequest(); + update(updateRequest); + //QueryExecution result = query(query); + } + }; + +// new SparqlQueryConnectionJsa( +// FluentQueryExecutionFactory +// .from(new QueryExecutionFactorySparqlQueryConnection(rawConn)) +// .config() +// .withParser(parser) +// .end() +// .create() +// ), rawConn, rawConn); + + + return result; + } + + + public static SparqlQueryConnection getQueryConnection(RDFConnectionModular conn) { + SparqlQueryConnection result; + try { + Field f = RDFConnectionModular.class.getDeclaredField("queryConnection"); + f.setAccessible(true); + result = (SparqlQueryConnection)f.get(conn); + } catch(Exception e) { + throw new RuntimeException(e); + } + + return result; + } + + public static SparqlUpdateConnection getUpdateConnection(RDFConnectionModular conn) { + SparqlUpdateConnection result; + try { + Field f = RDFConnectionModular.class.getDeclaredField("updateConnection"); + f.setAccessible(true); + result = (SparqlUpdateConnection)f.get(conn); + } catch(Exception e) { + throw new RuntimeException(e); + } + + return result; + } + + public static Dataset getDataset(RDFConnectionLocal conn) { + Dataset result; + try { + Field f = RDFConnectionLocal.class.getDeclaredField("dataset"); + f.setAccessible(true); + result = (Dataset)f.get(conn); + } catch(Exception e) { + throw new RuntimeException(e); + } + return result; + } + + + public static SparqlQueryConnection unwrapQueryConnection(SparqlQueryConnection conn) { + SparqlQueryConnection result; + if(conn instanceof RDFConnectionModular) { + SparqlQueryConnection tmp = getQueryConnection((RDFConnectionModular)conn); + result = unwrapQueryConnection(tmp); + } else { + result = conn; + } + + return result; + } + + public static SparqlUpdateConnection unwrapUpdateConnection(SparqlUpdateConnection conn) { + SparqlUpdateConnection result; + if(conn instanceof RDFConnectionModular) { + SparqlUpdateConnection tmp = getUpdateConnection((RDFConnectionModular)conn); + result = unwrapUpdateConnection(tmp); + } else { + result = conn; + } + + return result; + } + + public static RDFConnection wrapWithContext(RDFConnection rawConn) { + return wrapWithContext(rawConn, cxt -> {}); + } + + + /** + * Places the connection object as a symbol into to context, + * so that custom functions - notably E_Benchmark can + * pose further queries to it. + * + * FIXME Connections are usually not intended for concurrent use; + * we should put a connection supplier into the context instead! + * + * @param rawConn + * @return + */ + public static RDFConnection wrapWithContext(RDFConnection rawConn, Consumer contextHandler) { + RDFConnection[] result = {null}; + + SparqlUpdateConnection tmp = unwrapUpdateConnection(rawConn); + Dataset dataset = tmp instanceof RDFConnectionLocal + ? getDataset((RDFConnectionLocal)tmp) + : null; + + result[0] = + new RDFConnectionModular(rawConn, rawConn, rawConn) { + public QueryExecution query(Query query) { + return postProcess(rawConn.query(query)); + } + + @Override + public QueryExecution query(String queryString) { + return postProcess(rawConn.query(queryString)); + } + + + @Override + public void update(UpdateRequest update) { +// checkOpen(); +// Txn.executeWrite(dataset, () -> { + UpdateProcessor tmp = UpdateExecutionFactory.create(update, dataset); + UpdateProcessor up = postProcess(tmp); + up.execute(); +// }); + } + + + public UpdateProcessor postProcess(UpdateProcessor qe) { + Context cxt = qe.getContext(); + if(cxt != null) { + cxt.set(Symbols.symConnection, result[0]); + contextHandler.accept(cxt); + } + + return qe; + } + + public QueryExecution postProcess(QueryExecution qe) { + Context cxt = qe.getContext(); + if(cxt != null) { + cxt.set(Symbols.symConnection, result[0]); + contextHandler.accept(cxt); + } + + return qe; + } + }; + + return result[0]; + } + + +} diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionMetaData.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionMetaData.java similarity index 95% rename from jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionMetaData.java rename to jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionMetaData.java index ce487f439..817dcc1d7 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/RDFConnectionMetaData.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionMetaData.java @@ -1,4 +1,4 @@ -package org.aksw.jena_sparql_api.core; +package org.aksw.jena_sparql_api.core.connection; import java.util.List; import java.util.Set; diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionTransform.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionTransform.java new file mode 100644 index 000000000..a2c5bf506 --- /dev/null +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/RDFConnectionTransform.java @@ -0,0 +1,10 @@ +package org.aksw.jena_sparql_api.core.connection; + +import java.util.function.Function; + +import org.apache.jena.rdfconnection.RDFConnection; + +public interface RDFConnectionTransform + extends Function +{ +} diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionJsa.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionJsa.java index 3ef0e0abb..68e810f06 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionJsa.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionJsa.java @@ -6,18 +6,25 @@ import org.apache.jena.sparql.core.Transactional; public class SparqlQueryConnectionJsa - extends TransactionalDelegate - implements SparqlQueryConnectionTmp + extends TransactionalDelegate + implements SparqlQueryConnectionTmp { - protected QueryExecutionFactory queryExecutionFactory; + protected QueryExecutionFactory queryExecutionFactory; + protected Transactional transactional; public SparqlQueryConnectionJsa(QueryExecutionFactory queryExecutionFactory) { - this(queryExecutionFactory, new TransactionalTmp() {}); + this(queryExecutionFactory, new TransactionalTmp() {}); } public SparqlQueryConnectionJsa(QueryExecutionFactory queryExecutionFactory, Transactional transactional) { - super(transactional); + super(); this.queryExecutionFactory = queryExecutionFactory; + this.transactional = transactional; + } + + @Override + protected Transactional getDelegate() { + return transactional; } @Override @@ -40,4 +47,5 @@ public void close() { throw new RuntimeException(e); } } + } diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionTmp.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionTmp.java index 3bc4fb757..7b1712069 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionTmp.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionTmp.java @@ -45,8 +45,8 @@ public default void queryResultSet(Query query, Consumer resultSetAct if ( ! query.isSelectType() ) throw new JenaConnectionException("Query is not a SELECT query"); - Txn.executeRead(this, ()->{ - try ( QueryExecution qExec = query(query) ) { + Txn.executeRead(this, ()-> { + try (QueryExecution qExec = query(query) ) { ResultSet rs = qExec.execSelect(); resultSetAction.accept(rs); } @@ -155,8 +155,8 @@ public default boolean queryAsk(Query query) { public default QueryExecution query(String queryString) { return query(QueryFactory.create(queryString)); } - -// + +// // @Override // public default void querySelect(String query, Consumer rowAction) { // this.queryResultSet(query, rs -> { diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionWithExecFails.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionWithExecFails.java new file mode 100644 index 000000000..11d80c4de --- /dev/null +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionWithExecFails.java @@ -0,0 +1,69 @@ +package org.aksw.jena_sparql_api.core.connection; + +import java.util.Objects; +import java.util.function.Function; + +import org.aksw.jena_sparql_api.core.QueryExecutionDecorator; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryExecution; +import org.apache.jena.rdfconnection.SparqlQueryConnection; + +/** + * A query connection wrapper which can raise intentional exceptions on query execution. + * Useful for debugging behavior of application code under failure. + * + * @author raven + * + */ +public class SparqlQueryConnectionWithExecFails + extends TransactionalDelegate + implements SparqlQueryConnectionTmp +{ + protected SparqlQueryConnection delegate; + protected Function queryToThrowable; + + public SparqlQueryConnectionWithExecFails(SparqlQueryConnection delegate, + Function queryToThrowable) { + super(); + this.delegate = delegate; + this.queryToThrowable = queryToThrowable; + } + + @Override + public SparqlQueryConnection getDelegate() { + return delegate; + } + + @Override + public QueryExecution query(Query query) { + QueryExecution core = getDelegate().query(query); + return new QueryExecutionWithExecFails(core); + } + + @Override + public void close() { + } + + + public class QueryExecutionWithExecFails + extends QueryExecutionDecorator + { + public QueryExecutionWithExecFails(QueryExecution delegate) { + super(delegate); + Objects.requireNonNull(delegate.getQuery(), "The delegate query execution must expose the query"); + } + + @Override + protected void beforeExec() { + super.beforeExec(); + + Query query = getQuery(); + Throwable throwable = queryToThrowable.apply(query); + + if (throwable != null) { + throw new RuntimeException(throwable); + } + } + } + +} diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionWithReconnect.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionWithReconnect.java new file mode 100644 index 000000000..15a2371bf --- /dev/null +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlQueryConnectionWithReconnect.java @@ -0,0 +1,236 @@ +package org.aksw.jena_sparql_api.core.connection; + +import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +import org.aksw.commons.util.exception.ExceptionUtilsAksw; +import org.aksw.commons.util.healthcheck.HealthcheckRunner; +import org.aksw.jena_sparql_api.core.QueryExecutionDecorator; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryExecution; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.query.ResultSet; +import org.apache.jena.query.ResultSetFormatter; +import org.apache.jena.rdfconnection.SparqlQueryConnection; +import org.apache.jena.sparql.core.Transactional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A connection wrapper that tries to recover from loss of the underlying connection. + * Neither replays transactions nor individual queries. + * + * If a query fails due to a connection loss then attempts ares made to establish a new connection. + * If an attempt is successful then a ConnectionReestablishedException is raised which indicates that the + * query failed but the connection would be ready to accept workloads again. + * + * The main benefit of this class that it removes the need of passing a {@link java.sql.DataSource}-like object to + * client methods that operate on a single connection. It also takes care of the resource management such + * as closing the connections used for probing. + * The client methods can catch the + * {@link ConnectionReestablishedException} and {@link ConnectionLostException} and act accordingly + * such as by replaying some queries or moving on the the next workload. + * + * This class is thread safe: In case of a connection problem, the health check runner is started by one + * of the threads that requested a query execution. All other threads are blocked while the health check is running + * and fire the appropriate exception once it finishes. + * + * + * @author raven + * + */ +public class SparqlQueryConnectionWithReconnect + extends TransactionalDelegate + implements SparqlQueryConnectionTmp +{ + private static final Logger logger = LoggerFactory.getLogger(SparqlQueryConnectionWithReconnect.class); + + protected Callable dataConnectionSupplier; + protected Callable probeConnectionSupplier; + + protected Query healthCheckQuery = QueryFactory.create( + "SELECT * { ?s }"); + + protected Supplier healthCheckBuilder; + + /** True indicates that a recovery process was started which eventually failed */ + // protected boolean isLost = false; + protected transient Exception connectionLostCause = null; + + /** + * Number of times the healthcheck runner was invoked in an attempt to reconnect. + * Not to be confused with the number of reconnect attempts made by a single healthcheck run. + * This value is used as 'timestamp' when multiple requests are waiting for the connection + * to become available again + */ + protected transient int reconnectAttemptCount = 0; + + public boolean isConnectionLost() { + return connectionLostCause != null; + } + + public SparqlQueryConnectionWithReconnect( + Callable dataConnectionSupplier, + Callable probeConnectionSupplier, + Supplier healthCheckBuilder, + SparqlQueryConnection activeDelegate) { + super(); + this.dataConnectionSupplier = dataConnectionSupplier; + this.probeConnectionSupplier = probeConnectionSupplier; + this.activeDelegate = activeDelegate; + this.healthCheckBuilder = healthCheckBuilder; + } + + + public int getReconnectAttemptCount() { + return reconnectAttemptCount; + } + + /** Immediately obtain a connection from the supplier */ + public static SparqlQueryConnectionWithReconnect create( + Callable connectionSupplier + ) throws Exception { + SparqlQueryConnection conn = connectionSupplier.call(); + + return new SparqlQueryConnectionWithReconnect( + connectionSupplier, + connectionSupplier, + () -> HealthcheckRunner + .builder().setRetryCount(Long.MAX_VALUE).setInterval(5, TimeUnit.SECONDS), + conn); + } + + /** The currently active connection */ + protected SparqlQueryConnection activeDelegate; + + @Override + protected Transactional getDelegate() { + return activeDelegate; + } + + protected void checkForConnectionLoss() { + if (connectionLostCause != null) { + throw new ConnectionLostException("connection lost", connectionLostCause); + } + } + + @Override + public QueryExecution query(Query query) { + checkForConnectionLoss(); + + QueryExecution core = activeDelegate.query(query); + QueryExecution wrapped = new QueryExecutionWithReconnect(core); + return wrapped; + } + + + protected boolean isConnectionProblemException(Throwable t) { + return ExceptionUtilsAksw.isConnectionRefusedException(t) + || ExceptionUtilsAksw.isUnknownHostException(t); + } + + + protected void forceCloseActiveConn() { + try { + if (activeDelegate != null) { + activeDelegate.close(); + } + } catch (Exception e) { + logger.warn("Exception while attempting to close an apparently lost connecetion", e); + } + activeDelegate = null; + } + + /** + * This method is run by the healthcheck runner until there is no more exception + * + * @throws Exception + */ + protected void tryRecovery() throws Exception { + forceCloseActiveConn(); + + boolean reuseProbeConn = probeConnectionSupplier == dataConnectionSupplier; + + SparqlQueryConnection probeConn = null; + try { + probeConn = probeConnectionSupplier.call(); + try (QueryExecution qe = probeConn.query(healthCheckQuery)) { + ResultSet rs = qe.execSelect(); + ResultSetFormatter.consume(rs); + } + } catch (Exception e) { + if (probeConn != null) { + probeConn.close(); + } + throw new RuntimeException(e); + } + + activeDelegate = reuseProbeConn + ? probeConn + : dataConnectionSupplier.call(); + } + + @Override + public void close() { + activeDelegate.close(); + } + + + protected void testForConnectionProblem(Exception e, int timestamp) { + if (isConnectionProblemException(e)) { + handleConnectionProblem(e, timestamp); + } + else { + // Assume a 'normal' query exception, i.e. there is no problem + // with the connection itself + throw new RuntimeException(e); + } + } + + protected synchronized void handleConnectionProblem(Exception e, int timestamp) { + if (connectionLostCause == null && reconnectAttemptCount == timestamp) { + try { + healthCheckBuilder.get() + .setAction(() -> tryRecovery()) + .addFatalCondition(ex -> !isConnectionProblemException(ex)) + .build() + .run(); + } catch (Exception mostRecentHealthCheckException) { + connectionLostCause = mostRecentHealthCheckException; + } + ++reconnectAttemptCount; + } + + + if (connectionLostCause != null) { + throw new ConnectionLostException("connection lost", connectionLostCause); + } else { + throw new ConnectionReestablishedException("connection re-established", e); + } + } + + + public class QueryExecutionWithReconnect + extends QueryExecutionDecorator + { + public QueryExecutionWithReconnect(QueryExecution decoratee) { + super(decoratee); + } + + @Override + protected void beforeExec() { + super.beforeExec(); + + checkForConnectionLoss(); + } + + @Override + protected void onException(Exception e) { + int timestamp = getReconnectAttemptCount(); + + testForConnectionProblem(e, timestamp); + } + } + +} diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlUpdateConnectionMultiplex.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlUpdateConnectionMultiplex.java index 0c7afc138..e358d24e1 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlUpdateConnectionMultiplex.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/SparqlUpdateConnectionMultiplex.java @@ -8,38 +8,37 @@ import org.apache.jena.update.UpdateRequest; public class SparqlUpdateConnectionMultiplex - extends TransactionalMultiplex - implements SparqlUpdateConnection + extends TransactionalMultiplex + implements SparqlUpdateConnection { - public SparqlUpdateConnectionMultiplex(SparqlUpdateConnection ... delegates) { - this(Arrays.asList(delegates)); - } - - public SparqlUpdateConnectionMultiplex(Collection delegates) { - super(delegates); - // TODO Auto-generated constructor stub - } - - @Override - public void update(Update update) { - TransactionalMultiplex.forEach(delegates, d -> d.update(update)); - } - - @Override - public void update(UpdateRequest update) { - TransactionalMultiplex.forEach(delegates, d -> d.update(update)); - } - - @Override - public void update(String updateString) { - TransactionalMultiplex.forEach(delegates, d -> d.update(updateString)); - } - - @Override - public void close() { - TransactionalMultiplex.forEach(delegates, d -> d.close()); - } + public SparqlUpdateConnectionMultiplex(SparqlUpdateConnection ... delegates) { + this(Arrays.asList(delegates)); + } + + public SparqlUpdateConnectionMultiplex(Collection delegates) { + super(delegates); + } + + @Override + public void update(Update update) { + TransactionalMultiplex.forEach(delegates, d -> d.update(update)); + } + + @Override + public void update(UpdateRequest update) { + TransactionalMultiplex.forEach(delegates, d -> d.update(update)); + } + + @Override + public void update(String updateString) { + TransactionalMultiplex.forEach(delegates, d -> d.update(updateString)); + } + + @Override + public void close() { + TransactionalMultiplex.forEach(delegates, d -> d.close()); + } } diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/TransactionalDelegate.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/TransactionalDelegate.java index 146562e52..d4b253fb7 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/TransactionalDelegate.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/connection/TransactionalDelegate.java @@ -4,58 +4,53 @@ import org.apache.jena.query.TxnType; import org.apache.jena.sparql.core.Transactional; -public class TransactionalDelegate - implements Transactional +public abstract class TransactionalDelegate + implements Transactional { - protected Transactional delegate; - - public TransactionalDelegate(Transactional delegate) { - super(); - this.delegate = delegate; - } - - @Override - public void begin(ReadWrite readWrite) { - delegate.begin(readWrite); - } - - @Override - public void commit() { - delegate.commit(); - } - - @Override - public void abort() { - delegate.abort(); - } - - @Override - public void end() { - delegate.end(); - } - - @Override - public boolean isInTransaction() { - return delegate.isInTransaction(); - } - - @Override - public void begin(TxnType type) { - delegate.begin(type); - } - - @Override - public boolean promote(Promote mode) { - return delegate.promote(mode); - } - - @Override - public ReadWrite transactionMode() { - return delegate.transactionMode(); - } - - @Override - public TxnType transactionType() { - return delegate.transactionType(); - } + protected abstract Transactional getDelegate(); + + @Override + public void begin(ReadWrite readWrite) { + getDelegate().begin(readWrite); + } + + @Override + public void commit() { + getDelegate().commit(); + } + + @Override + public void abort() { + getDelegate().abort(); + } + + @Override + public void end() { + getDelegate().end(); + } + + @Override + public boolean isInTransaction() { + return getDelegate().isInTransaction(); + } + + @Override + public void begin(TxnType type) { + getDelegate().begin(type); + } + + @Override + public boolean promote(Promote mode) { + return getDelegate().promote(mode); + } + + @Override + public ReadWrite transactionMode() { + return getDelegate().transactionMode(); + } + + @Override + public TxnType transactionType() { + return getDelegate().transactionType(); + } } diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/plugin/JenaPluginJsaCore.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/plugin/JenaPluginJsaCore.java index 9ea87568e..201a58f43 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/plugin/JenaPluginJsaCore.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/plugin/JenaPluginJsaCore.java @@ -1,26 +1,27 @@ package org.aksw.jena_sparql_api.core.plugin; -import org.aksw.jena_sparql_api.core.RDFConnectionMetaData; +import org.aksw.jena_sparql_api.core.connection.RDFConnectionMetaData; import org.aksw.jena_sparql_api.mapper.proxy.JenaPluginUtils; import org.apache.jena.enhanced.Personality; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.sys.JenaSubsystemLifecycle; +import org.apache.jena.sys.JenaSystem; public class JenaPluginJsaCore - implements JenaSubsystemLifecycle + implements JenaSubsystemLifecycle { - public void start() { - init(); - } + public void start() { + init(); + } - @Override - public void stop() { - } + @Override + public void stop() { + } - public static void init() { - JenaPluginUtils.registerResourceClasses(RDFConnectionMetaData.class); - } - - public static void init(Personality p) { - } + public static void init() { + JenaPluginUtils.registerResourceClasses(RDFConnectionMetaData.class); + } + + public static void init(Personality p) { + } } diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/utils/QueryExecutionUtils.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/utils/QueryExecutionUtils.java index 9bfcfe808..782c81e10 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/utils/QueryExecutionUtils.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/core/utils/QueryExecutionUtils.java @@ -4,20 +4,17 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import java.util.function.Function; +import java.util.function.Supplier; import org.aksw.jena_sparql_api.core.QueryExecutionFactory; import org.aksw.jena_sparql_api.core.ResultSetCloseable; -import org.aksw.jena_sparql_api.lookup.GuavaFunctionWrapper; import org.aksw.jena_sparql_api.mapper.BindingMapper; -import org.aksw.jena_sparql_api.mapper.BindingMapperProjectVar; import org.aksw.jena_sparql_api.mapper.BindingMapperQuad; import org.aksw.jena_sparql_api.mapper.BindingMapperUtils; -import org.aksw.jena_sparql_api.mapper.FunctionBindingMapper; import org.aksw.jena_sparql_api.syntax.QueryGenerationUtils; import org.aksw.jena_sparql_api.utils.CloseableQueryExecution; import org.aksw.jena_sparql_api.utils.ExtendedIteratorClosable; -import org.aksw.jena_sparql_api.utils.IteratorResultSetBinding; +import org.aksw.jena_sparql_api.utils.ResultSetUtils; import org.aksw.jena_sparql_api.utils.Vars; import org.apache.jena.atlas.lib.Closeable; import org.apache.jena.atlas.lib.Sink; @@ -30,6 +27,7 @@ import org.apache.jena.query.ResultSetFormatter; import org.apache.jena.query.Syntax; import org.apache.jena.shared.impl.PrefixMappingImpl; +import org.apache.jena.sparql.algebra.Table; import org.apache.jena.sparql.core.Quad; import org.apache.jena.sparql.core.Var; import org.apache.jena.sparql.engine.binding.Binding; @@ -40,8 +38,6 @@ import org.apache.jena.sparql.syntax.Element; import org.apache.jena.sparql.syntax.ElementSubQuery; import org.apache.jena.util.iterator.ExtendedIterator; -import org.apache.jena.util.iterator.NiceIterator; -import org.apache.jena.util.iterator.WrappedIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,6 +53,16 @@ public class QueryExecutionUtils { public static final Var vo = Var.alloc("o"); + public static Table execSelectTable(Supplier qeSupp) { + Table result; + try (QueryExecution qe = qeSupp.get()) { + ResultSet rs = qe.execSelect(); + result = ResultSetUtils.resultSetToTable(rs); + } + return result; + } + + public static void abortAfterFirstRow(QueryExecution qe) { Query query = qe.getQuery(); assert query != null : "QueryExecution did not tell us which query it is bound to - query was null"; diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/ListServiceEntityQuery.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/ListServiceEntityQuery.java new file mode 100644 index 000000000..28ff6baa0 --- /dev/null +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/ListServiceEntityQuery.java @@ -0,0 +1,209 @@ +package org.aksw.jena_sparql_api.lookup; + +import java.util.AbstractMap.SimpleEntry; +import java.util.Map.Entry; +import java.util.function.Function; + +import org.aksw.jena_sparql_api.concepts.Concept; +import org.aksw.jena_sparql_api.concepts.ConceptUtils; +import org.aksw.jena_sparql_api.concepts.UnaryRelation; +import org.aksw.jena_sparql_api.rx.EntityBaseQuery; +import org.aksw.jena_sparql_api.rx.SparqlRx; +import org.aksw.jena_sparql_api.rx.entity.engine.EntityQueryRx; +import org.aksw.jena_sparql_api.rx.entity.engine.EntityQueryRx.EntityQueryProcessed; +import org.aksw.jena_sparql_api.rx.entity.model.AttributeGraphFragment; +import org.aksw.jena_sparql_api.rx.entity.model.EntityQueryBasic; +import org.aksw.jena_sparql_api.rx.entity.model.EntityQueryImpl; +import org.aksw.jena_sparql_api.utils.QueryUtils; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdfconnection.SparqlQueryConnection; + +import com.google.common.collect.Range; + +import io.reactivex.rxjava3.core.Flowable; +import io.reactivex.rxjava3.core.Single; + +public class ListServiceEntityQuery + implements ListService +{ + protected SparqlQueryConnection conn; + protected AttributeGraphFragment attributePart; + + public ListServiceEntityQuery(SparqlQueryConnection conn, AttributeGraphFragment attributePart) { + super(); + this.conn = conn; + this.attributePart = attributePart; + } + + + @Override + public ListPaginator createPaginator(EntityBaseQuery baseQuery) { + return new ListPaginatorEntityQuery(baseQuery); + } + + + public class ListPaginatorEntityQuery + implements ListPaginator + { + protected EntityBaseQuery baseQuery; + + public ListPaginatorEntityQuery(EntityBaseQuery baseQuery) { + super(); + this.baseQuery = baseQuery; + } + + @Override + public Flowable apply(Range t) { + t = t == null ? Range.atLeast(0l) : t; + + EntityBaseQuery clone = baseQuery.cloneQuery(); + + // TODO Ensure we create a deep clone + Query standardQuery = clone.getStandardQuery(); + + Range baseRange = QueryUtils.toRange(baseQuery.getStandardQuery()); + Range effectiveRange = QueryUtils.subRange(baseRange, t); + + QueryUtils.applyRange(standardQuery, effectiveRange); + + + EntityQueryImpl entityQuery = new EntityQueryImpl(); + entityQuery.setBaseQuery(clone); + entityQuery.setAttributePart(attributePart); + + + // QueryUtils.applySlice(query, offset, limit, cloneOnChange) + + Flowable result = EntityQueryRx.execConstructEntities(conn, entityQuery); + return result; + } + + @Override + public Single> fetchCount(Long itemLimit, Long rowLimit) { + EntityQueryImpl entityQuery = new EntityQueryImpl(); + entityQuery.getAttributePart().getMandatoryJoins().addAll(attributePart.getMandatoryJoins()); + entityQuery.setBaseQuery(baseQuery); + + EntityQueryBasic basic = EntityQueryRx.assembleEntityAndAttributeParts(entityQuery); + EntityQueryProcessed processed = EntityQueryRx.processEntityQuery(basic, true); + Query query = processed.getInnerSelect(); + + // Entry countData = QueryGenerationUtils.createQueryCount(query); + + Single> result = SparqlRx.fetchCountQueryPartition(conn, query, processed.getPartitionVars(), itemLimit, rowLimit); + return result; + } + } + + public MapService asMapService() { + MapService result = new MapServiceFromListService<>( + this, RDFNode::asNode, Function.identity()); + + return result; + } + + public LookupService asLookupService() { + MapServiceFromListService mapService = new MapServiceFromListService<>( + this, RDFNode::asNode, Function.identity()); + + + LookupService result = mapService.asLookupService(ListServiceEntityQuery::toBaseQuery); + return result; + } + + public static EntityBaseQuery fromConcept(UnaryRelation rel) { + return EntityBaseQuery.create(rel.getVar(), rel.asQuery()); + } + + public static EntityBaseQuery toBaseQuery(Iterable nodes) { + Concept concept = ConceptUtils.createConcept(nodes); + EntityBaseQuery result = fromConcept(concept); + return result; + } +} + +class MapServiceFromListService + implements MapService +{ + protected ListService listService; + protected Function itemToKey; + protected Function itemToValue; + + public MapServiceFromListService( + ListService listService, + Function itemToKey, + Function itemToValue) { + super(); + this.listService = listService; + this.itemToKey = itemToKey; + this.itemToValue = itemToValue; + } + + + public class MapPaginatorFromListService + implements MapPaginator + { + protected ListPaginator listPaginator; + + public MapPaginatorFromListService(ListPaginator listPaginator) { + super(); + this.listPaginator = listPaginator; + } + + @Override + public Flowable> apply(Range t) { + Flowable> result = listPaginator.apply(t) + .map(item -> { + K key = itemToKey.apply(item); + V value = itemToValue.apply(item); + Entry r = new SimpleEntry<>(key, value); + return r; + }); + + return result; + } + + @Override + public Single> fetchCount(Long itemLimit, Long rowLimit) { + return listPaginator.fetchCount(itemLimit, rowLimit); + } + } + + + @Override + public MapPaginator createPaginator(C concept) { + ListPaginator listPaginator = listService.createPaginator(concept); + return new MapPaginatorFromListService(listPaginator); + } + + public LookupService asLookupService(Function, C> keysToFilter) { + LookupService result = new LookupServiceFromMapService<>(this, keysToFilter); + return result; + } +} + +class LookupServiceFromMapService + implements LookupService +{ + protected MapService mapService; + protected Function, C> keysToFilter; + + + public LookupServiceFromMapService(MapService mapService, + Function, C> keysToFilter) { + super(); + this.mapService = mapService; + this.keysToFilter = keysToFilter; + } + + + @Override + public Flowable> apply(Iterable t) { + C filter = keysToFilter.apply(t); + return mapService.streamData(filter, Range.atLeast(0l)); + } + +} + diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupService.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupService.java index d655bdfa0..1bd42f548 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupService.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupService.java @@ -20,6 +20,10 @@ default LookupService mapValues(BiFunction fn) { return LookupServiceTransformValue.create(this, fn); } + default LookupService mapKeys(Function fn) { + return LookupServiceTransformKey.create(this, fn); + } + default LookupService cache() { return LookupServiceCacheMem.create(this); } @@ -58,4 +62,64 @@ default List fetchList(Iterable keys) { return result; } + + /** + * A convenience short-hand for fetching a map + * by first mapping the keys to proxy keys. + * + * Equivalent to + *
this.mapKeys(keyMapper).fetchMap(keys)
+ * + * @param + * @param keys + * @param keyToProxy + * @return + */ + default Map fetchMap( + Iterable keys, + Function keyMapper) { + Map result = this.mapKeys(keyMapper).fetchMap(keys); + return result; + } + + +// default LookupService fetchMapWithProxyKeys(Iterable keys, +// Function keyToProxy) { +// return fetchMapWithProxyKeys(keys, keyToProxy, this); +// } +// +// default Map fetchMapWithProxyKeys(Iterable keys, +// Function keyToProxy) { +// return fetchMapWithProxyKeys(keys, keyToProxy, this); +// } +// +// /** +// * +// * +// * @param +// * @param

+// * @param +// * @param keys +// * @param keyToProxy +// * @param delegate +// * @return +// */ +// static Map fetchMapWithProxyKeys( +// Iterable keys, +// Function keyToProxy, +// LookupService delegate) { +// Multimap index = Multimaps.index(keys, keyToProxy::apply); +// +// Set

proxyKeys = index.asMap().keySet(); +// Map proxyKeyToValue = delegate.fetchMap(proxyKeys); +// +// Map result = Streams.stream(keys) +// .collect(Collectors.toMap(key -> key, key -> { +// P proxyKey = keyToProxy.apply(key); +// V r = proxyKeyToValue.get(proxyKey); +// return r; +// })); +// +// return result; +// } } diff --git a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupServiceTransformKey.java b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupServiceTransformKey.java index 740b65796..153535968 100644 --- a/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupServiceTransformKey.java +++ b/jena-sparql-api-core/src/main/java/org/aksw/jena_sparql_api/lookup/LookupServiceTransformKey.java @@ -4,18 +4,29 @@ import java.util.LinkedHashMap; import java.util.Map; import java.util.Map.Entry; - -import com.google.common.base.Function; +import java.util.function.Function; import io.reactivex.rxjava3.core.Flowable; +/** + * Create a new LookupService which maps keys to a target domain + * before passing them to the delegate service. + * + * @author raven + * + * @param + * @param + * @param + */ public class LookupServiceTransformKey implements LookupService { private LookupService delegate; - private Function keyMapper; + private Function keyMapper; - public LookupServiceTransformKey(LookupService delegate, Function keyMapper) { + public LookupServiceTransformKey( + LookupService delegate, + Function keyMapper) { super(); this.delegate = delegate; this.keyMapper = keyMapper; @@ -61,8 +72,10 @@ public Flowable> apply(Iterable keys) { return result; } - public static LookupServiceTransformKey create(LookupService base, Function keyMapper) { - LookupServiceTransformKey result = new LookupServiceTransformKey(base, keyMapper); + public static LookupService create( + LookupService base, + Function keyMapper) { + LookupService result = new LookupServiceTransformKey<>(base, keyMapper); return result; } } diff --git a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/SparqlTest.java b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/SparqlTest.java index c14fbaa62..dba30bdc1 100644 --- a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/SparqlTest.java +++ b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/SparqlTest.java @@ -31,6 +31,7 @@ import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Resource; +import org.apache.jena.sys.JenaSystem; import org.apache.jena.vocabulary.OWL; import org.apache.jena.vocabulary.RDF; import org.junit.Test; @@ -81,6 +82,9 @@ public void run() { */ public class SparqlTest { + static { JenaSystem.init(); } + + // @BeforeClass // public static void setUp() { // PropertyConfigurator.configure("log4j.properties"); diff --git a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/core/utils/UpdateRequestUtilsTest.java b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/core/utils/UpdateRequestUtilsTest.java index 74adad04f..06030989a 100644 --- a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/core/utils/UpdateRequestUtilsTest.java +++ b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/core/utils/UpdateRequestUtilsTest.java @@ -2,12 +2,15 @@ import org.aksw.jena_sparql_api.syntax.UpdateRequestUtils; import org.apache.jena.query.Syntax; +import org.apache.jena.sys.JenaSystem; import org.apache.jena.update.UpdateFactory; import org.apache.jena.update.UpdateRequest; import org.junit.Test; public class UpdateRequestUtilsTest { + static { JenaSystem.init(); } + @Test public void testFixVarNames() { String str = diff --git a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/lookup/TestListServiceEntityQuery.java b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/lookup/TestListServiceEntityQuery.java new file mode 100644 index 000000000..7abb1411d --- /dev/null +++ b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/lookup/TestListServiceEntityQuery.java @@ -0,0 +1,45 @@ +package org.aksw.jena_sparql_api.lookup; + +import java.util.List; + +import org.aksw.jena_sparql_api.rx.EntityBaseQuery; +import org.aksw.jena_sparql_api.rx.entity.model.AttributeGraphFragment; +import org.aksw.jena_sparql_api.stmt.SparqlQueryParser; +import org.aksw.jena_sparql_api.stmt.SparqlQueryParserImpl; +import org.aksw.jena_sparql_api.utils.Vars; +import org.apache.jena.query.Dataset; +import org.apache.jena.query.DatasetFactory; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdfconnection.RDFConnectionFactory; +import org.apache.jena.rdfconnection.SparqlQueryConnection; +import org.apache.jena.shared.PrefixMapping; +import org.apache.jena.sys.JenaSystem; +import org.junit.Test; + +import com.google.common.collect.Range; + +public class TestListServiceEntityQuery { + + static { JenaSystem.init(); } + + @Test + public void testListServiceFromEntityQuery() { + SparqlQueryParser parser = SparqlQueryParserImpl.create(PrefixMapping.Extended); + + AttributeGraphFragment agf = new AttributeGraphFragment(); + agf.addMandatoryJoin(Vars.s, parser.apply("CONSTRUCT WHERE { ?s eg:type ?t }")); + agf.addOptionalJoin(Vars.s, parser.apply("CONSTRUCT WHERE { ?s eg:label ?l }")); + + Dataset dataset = DatasetFactory.create(); + try (SparqlQueryConnection conn = RDFConnectionFactory.connect(dataset)) { + ListService listService = new ListServiceEntityQuery(conn, agf); + + EntityBaseQuery baseQuery = EntityBaseQuery.create(Vars.x, parser.apply("SELECT * { ?x a ?y } LIMIT 10 OFFSET 5")); + ListPaginator paginator = listService.createPaginator(baseQuery); + System.out.println(paginator.fetchCount(null, null).blockingGet()); + + List nodes = paginator.apply(Range.closedOpen(0l, 10l)).toList().blockingGet(); + System.out.println(nodes); + } + } +} diff --git a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/lookup/TestReactiveSparql.java b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/lookup/TestReactiveSparql.java index 1a3869a90..3ebc61742 100644 --- a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/lookup/TestReactiveSparql.java +++ b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/lookup/TestReactiveSparql.java @@ -19,6 +19,7 @@ import org.apache.jena.rdfconnection.RDFConnectionFactory; import org.apache.jena.riot.RDFDataMgr; import org.apache.jena.sparql.algebra.Table; +import org.apache.jena.sys.JenaSystem; import org.junit.Test; import com.google.common.collect.Range; @@ -28,6 +29,7 @@ public class TestReactiveSparql { + static { JenaSystem.init(); } //@Test diff --git a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/transform/TestElementTransformVirtualPredicates.java b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/transform/TestElementTransformVirtualPredicates.java index e82e0875f..1eb207f09 100644 --- a/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/transform/TestElementTransformVirtualPredicates.java +++ b/jena-sparql-api-core/src/test/java/org/aksw/jena_sparql_api/transform/TestElementTransformVirtualPredicates.java @@ -27,11 +27,13 @@ import org.apache.jena.sparql.core.Prologue; import org.apache.jena.sparql.core.Var; import org.apache.jena.sparql.util.Context; +import org.apache.jena.sys.JenaSystem; import org.apache.jena.vocabulary.RDFS; import org.junit.Test; public class TestElementTransformVirtualPredicates { + static { JenaSystem.init(); } public static void main(String[] args) { TestElementTransformVirtualPredicates x = new TestElementTransformVirtualPredicates(); @@ -40,10 +42,10 @@ public static void main(String[] args) { @Test public void test() { - // Set up some default namespaces - Prologue prologue = new Prologue(); + // Set up some default namespaces + Prologue prologue = new Prologue(); prologue.setPrefixMapping(PrefixMapping.Extended); - + // Load a simple RDF model about people and their birth date // Note, that there is no explicit 'age' attribute in the data Model model = RDFDataMgr.loadModel("virtual-predicates-example.ttl"); @@ -51,37 +53,37 @@ public void test() { // Set up a map for expanding predicates with binary (sparql) relations - Map virtualPredicates = new HashMap(); + Map virtualPredicates = new HashMap(); - // Register a virtual predicate that computes the age from the current - // date and birth date of a person + // Register a virtual predicate that computes the age from the current + // date and birth date of a person virtualPredicates.put(NodeFactory.createURI("http://www.example.org/age"), - BinaryRelationImpl.create("?s a eg:Person ; eg:birthDate ?start . " + - "BIND(NOW() AS ?end) " + - "BIND(YEAR(?end) - YEAR(?start) - IF(MONTH(?end) < MONTH(?start) || (MONTH(?end) = MONTH(?start) && DAY(?end) < DAY(?start)), 1, 0) as ?age)", + BinaryRelationImpl.create("?s a eg:Person ; eg:birthDate ?start . " + + "BIND(NOW() AS ?end) " + + "BIND(YEAR(?end) - YEAR(?start) - IF(MONTH(?end) < MONTH(?start) || (MONTH(?end) = MONTH(?start) && DAY(?end) < DAY(?start)), 1, 0) as ?age)", "s", "age", prologue)); - + // Set up some queries and run them SparqlQueryParser parser = SparqlQueryParserImpl.create(Syntax.syntaxARQ, prologue); - + List queries = Arrays.asList( parser.apply("Select (year(NOW()) - year('1984-01-01'^^xsd:date) AS ?d) { }"), - parser.apply("Select * { ?s ?p ?o }"), - parser.apply("Select * { ?s eg:age ?o }"), - parser.apply("Select * { ?s a eg:Person ; eg:age ?a }"), - parser.apply("Select * { ?s a eg:Person ; ?p ?o . FILTER(?p = eg:age) }") + parser.apply("Select * { ?s ?p ?o }"), + parser.apply("Select * { ?s eg:age ?o }"), + parser.apply("Select * { ?s a eg:Person ; eg:age ?a }"), + parser.apply("Select * { ?s a eg:Person ; ?p ?o . FILTER(?p = eg:age) }") ); for(Query query : queries) { - if(query.isQueryResultStar()) { - query.getProjectVars().addAll(query.getResultVars().stream().map(Var::alloc).collect(Collectors.toList())); - query.setQueryResultStar(false); - System.out.println(query); - } - - Query intermediateQuery = ElementTransformVirtualPredicates.transform(query, virtualPredicates, true); - + if(query.isQueryResultStar()) { + query.getProjectVars().addAll(query.getResultVars().stream().map(Var::alloc).collect(Collectors.toList())); + query.setQueryResultStar(false); + System.out.println(query); + } + + Query intermediateQuery = ElementTransformVirtualPredicates.transform(query, virtualPredicates, true); + Op op = Algebra.compile(intermediateQuery); Context ctx = ARQ.getContext().copy(); @@ -104,13 +106,13 @@ public void test() { // ctx.put(ARQ.optFilterExpandOneOf, false); ctx.put(ARQ.optFilterPlacement, true); ctx.put(ARQ.optFilterPlacementBGP, true); - + // TODO Implement rewrite to pull up // extends over joins (join(..., extends(...), ...) -> extends(join(...)) // Then apply merge BGP //ctx.put(ARQ.opt); - - + + //op = Optimize.optimize(op, ctx); System.out.println(op); @@ -119,12 +121,12 @@ public void test() { System.out.println("Rewritten query: " + finalQuery); System.out.println(ResultSetFormatter.asText( - FluentQueryExecutionFactory - .from(model).create().createQueryExecution(finalQuery).execSelect())); - + FluentQueryExecutionFactory + .from(model).create().createQueryExecution(finalQuery).execSelect())); + } - - + + //virtualPredicates.put(NodeFactory.createURI("http://ex.org/label"), Relation.create("GRAPH ?g { ?s ?p ?o } . ?g ?o", "s", "o")); //virtualPredicates.put(NodeFactory.createURI("http://ex.org/label"), Relation.create("?s ?g . ?g ?o", "s", "o")); diff --git a/jena-sparql-api-data-client/pom.xml b/jena-sparql-api-data-client/pom.xml index d7407207d..775f90837 100644 --- a/jena-sparql-api-data-client/pom.xml +++ b/jena-sparql-api-data-client/pom.xml @@ -8,7 +8,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/pom.xml b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/pom.xml index 104ff1476..4fda11c5d 100644 --- a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/pom.xml +++ b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-dcat-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/DcatDatasetEntity.java b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/DcatDatasetEntity.java new file mode 100644 index 000000000..cd55e6651 --- /dev/null +++ b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/DcatDatasetEntity.java @@ -0,0 +1,13 @@ +package org.aksw.dcat.jena.domain.api; + +/** + * A resource that holds information about how to transform a dataset + * + * @author raven + * + */ +public interface DcatDatasetEntity + extends MavenEntity, DcatDataset +{ + +} diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/DcatEntityCore.java b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/DcatEntityCore.java index 018afaada..ea57b85ab 100644 --- a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/DcatEntityCore.java +++ b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/DcatEntityCore.java @@ -3,14 +3,17 @@ public interface DcatEntityCore { // String getCkanId(); // void setCkanId(String id); - - // Name is a public identifier; id is a internal identifier (e.g. ckan) - String getIdentifier(); - void setIdentifier(String name); - String getTitle(); - void setTitle(String title); + String getIdentifier(); + void setIdentifier(String name); - String getDescription(); - void setDescription(String description); + /** A local name such as a human readable string in a CKAN catalog */ +// String getLocalName(); +// void setLocalName(String name); + + String getTitle(); + void setTitle(String title); + + String getDescription(); + void setDescription(String description); } diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MvnEntity.java b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MavenEntity.java similarity index 72% rename from jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MvnEntity.java rename to jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MavenEntity.java index ecbad94e8..90b6b87e4 100644 --- a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MvnEntity.java +++ b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MavenEntity.java @@ -13,22 +13,22 @@ * */ @ResourceView -public interface MvnEntity - extends Resource, MvnEntityCore +public interface MavenEntity + extends Resource, MavenEntityCore { @Iri("http://dataid.dbpedia.org/ns/core#group") String getGroupId(); - MvnEntity setGroupId(String groupId); + MavenEntity setGroupId(String groupId); @Iri("http://dataid.dbpedia.org/ns/core#artifact") String getArtifactId(); - MvnEntity setArtifactId(String artifactId); + MavenEntity setArtifactId(String artifactId); @IriNs("mvn") String getVersion(); - MvnEntity setVersion(String version); + MavenEntity setVersion(String version); @IriNs("mvn") String getClassifier(); - MvnEntity setClassifier(String classifier); + MavenEntity setClassifier(String classifier); } diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MavenEntityCore.java b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MavenEntityCore.java new file mode 100644 index 000000000..af7242ef1 --- /dev/null +++ b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MavenEntityCore.java @@ -0,0 +1,15 @@ +package org.aksw.dcat.jena.domain.api; + +public interface MavenEntityCore { + String getGroupId(); + MavenEntityCore setGroupId(String groupId); + + String getArtifactId(); + MavenEntityCore setArtifactId(String artifactId); + + String getVersion(); + MavenEntityCore setVersion(String version); + + String getClassifier(); + MavenEntityCore setClassifier(String classifier); +} diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MvnEntityCore.java b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MvnEntityCore.java deleted file mode 100644 index a0f4e6843..000000000 --- a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/api/MvnEntityCore.java +++ /dev/null @@ -1,15 +0,0 @@ -package org.aksw.dcat.jena.domain.api; - -public interface MvnEntityCore { - String getGroupId(); - MvnEntityCore setGroupId(String groupId); - - String getArtifactId(); - MvnEntityCore setArtifactId(String artifactId); - - String getVersion(); - MvnEntityCore setVersion(String version); - - String getClassifier(); - MvnEntityCore setClassifier(String classifier); -} diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/impl/DcatEntityImpl.java b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/impl/DcatEntityImpl.java index c55eccff3..5244ebd33 100644 --- a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/impl/DcatEntityImpl.java +++ b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/domain/impl/DcatEntityImpl.java @@ -9,63 +9,61 @@ import org.apache.jena.vocabulary.DCTerms; public class DcatEntityImpl - extends ResourceImpl - implements DcatEntity + extends ResourceImpl + implements DcatEntity { - public DcatEntityImpl(Node node, EnhGraph graph) { - super(node, graph); - } - - - @Override - public Resource asResource() { - return new ResourceImpl(this.node, this.enhGraph); - } + public DcatEntityImpl(Node node, EnhGraph graph) { + super(node, graph); + } + + + @Override + public Resource asResource() { + return new ResourceImpl(this.node, this.enhGraph); + } // @Override // public String getCkanId() { // String result = ResourceUtils.getLiteralValue(this, DCTerms.identifier, Literal::getString).orElse(null); // return result; // } -// +// // @Override // public void setCkanId(String identifier) { // ResourceUtils.setLiteralValue(this, DCTerms.identifier, String.class, identifier); // } - - @Override - public String getIdentifier() { - String result = ResourceUtils.getLiteralPropertyValue(this, DCTerms.identifier, String.class); - return result; - } - - @Override - public void setIdentifier(String identifier) { - ResourceUtils.setLiteralProperty(this, DCTerms.identifier, identifier); - } - @Override - public String getTitle() { - String result = ResourceUtils.getLiteralPropertyValue(this, DCTerms.title, String.class); - return result; - } + @Override + public String getIdentifier() { + String result = ResourceUtils.getLiteralPropertyValue(this, DCTerms.identifier, String.class); + return result; + } + + @Override + public void setIdentifier(String identifier) { + ResourceUtils.setLiteralProperty(this, DCTerms.identifier, identifier); + } + + @Override + public String getTitle() { + String result = ResourceUtils.getLiteralPropertyValue(this, DCTerms.title, String.class); + return result; + } - @Override - public String getDescription() { - String result = ResourceUtils.getLiteralPropertyValue(this, DCTerms.description, String.class); - return result; - } + @Override + public String getDescription() { + String result = ResourceUtils.getLiteralPropertyValue(this, DCTerms.description, String.class); + return result; + } - @Override - public void setTitle(String title) { - ResourceUtils.setLiteralProperty(this, DCTerms.title, title); - } + @Override + public void setTitle(String title) { + ResourceUtils.setLiteralProperty(this, DCTerms.title, title); + } - @Override - public void setDescription(String description) { - ResourceUtils.setLiteralProperty(this, DCTerms.description, description); - } - - + @Override + public void setDescription(String description) { + ResourceUtils.setLiteralProperty(this, DCTerms.description, description); + } } diff --git a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/plugin/JenaPluginDcat.java b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/plugin/JenaPluginDcat.java index af5880d6f..815aa40ff 100644 --- a/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/plugin/JenaPluginDcat.java +++ b/jena-sparql-api-dcat-parent/jena-sparql-api-dcat-api/src/main/java/org/aksw/dcat/jena/plugin/JenaPluginDcat.java @@ -2,7 +2,7 @@ import org.aksw.dcat.jena.domain.api.DcatDataset; import org.aksw.dcat.jena.domain.api.DcatDistribution; -import org.aksw.dcat.jena.domain.api.MvnEntity; +import org.aksw.dcat.jena.domain.api.MavenEntity; import org.aksw.dcat.jena.domain.impl.DatasetImpl; import org.aksw.dcat.jena.domain.impl.DcatDistributionImpl; import org.aksw.jena_sparql_api.mapper.proxy.JenaPluginUtils; @@ -28,7 +28,7 @@ public static void init() { } public static void init(Personality p) { - JenaPluginUtils.registerResourceClasses(MvnEntity.class); + JenaPluginUtils.registerResourceClasses(MavenEntity.class); p.add(DcatDataset.class, new SimpleImplementation(DatasetImpl::new)); p.add(DcatDistribution.class, new SimpleImplementation(DcatDistributionImpl::new)); } diff --git a/jena-sparql-api-dcat-parent/pom.xml b/jena-sparql-api-dcat-parent/pom.xml index 8bbfdfdd0..8ec490065 100644 --- a/jena-sparql-api-dcat-parent/pom.xml +++ b/jena-sparql-api-dcat-parent/pom.xml @@ -8,7 +8,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-example-proxy/pom.xml b/jena-sparql-api-example-proxy/pom.xml index 9c1bb35b0..748c621bc 100644 --- a/jena-sparql-api-example-proxy/pom.xml +++ b/jena-sparql-api-example-proxy/pom.xml @@ -11,7 +11,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-example-views/pom.xml b/jena-sparql-api-example-views/pom.xml index eeebaae48..691cededb 100644 --- a/jena-sparql-api-example-views/pom.xml +++ b/jena-sparql-api-example-views/pom.xml @@ -8,7 +8,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-geo/pom.xml b/jena-sparql-api-geo/pom.xml index 89aa8773d..6cf498663 100644 --- a/jena-sparql-api-geo/pom.xml +++ b/jena-sparql-api-geo/pom.xml @@ -11,7 +11,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-io-core/pom.xml b/jena-sparql-api-io-core/pom.xml index 19326b6b2..a6fedfd8b 100644 --- a/jena-sparql-api-io-core/pom.xml +++ b/jena-sparql-api-io-core/pom.xml @@ -12,7 +12,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BinarySearchOnBlockSource.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BinarySearchOnBlockSource.java index 9564c3c61..447996d4b 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BinarySearchOnBlockSource.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BinarySearchOnBlockSource.java @@ -27,7 +27,7 @@ public InputStream search(byte[] prefix) throws IOException { long maxBlockOffset = blockSource.size(); - Reference blockRef; + Reference blockRef; if(prefix == null || prefix.length == 0) { blockRef = blockSource.contentAtOrAfter(0, true); } else { diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Block.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Block.java index 537972b32..59e7c16d3 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Block.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Block.java @@ -45,8 +45,8 @@ public interface Block */ // Block getBlockSource(); - Reference nextBlock() throws IOException; - Reference prevBlock() throws IOException; + Reference nextBlock() throws IOException; + Reference prevBlock() throws IOException; /** * Check if there is a subsequent block. diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockIterState.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockIterState.java index c5596b896..aff84feed 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockIterState.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockIterState.java @@ -23,7 +23,7 @@ public class BlockIterState { // implements Iterator { // protected OpenBlock current; - public Reference blockRef; + public Reference blockRef; public Block block; public Seekable seekable; @@ -32,10 +32,10 @@ public class BlockIterState { protected boolean skipFirstClose; protected boolean isFwd; - public BlockIterState(boolean yieldSelf, Reference blockRef, Seekable seekable, boolean isFwd) { + public BlockIterState(boolean yieldSelf, Reference blockRef, Seekable seekable, boolean isFwd) { // this.current = new OpenBlock(blockRef, seekable); - Objects.requireNonNull(blockRef); - + Objects.requireNonNull(blockRef); + this.blockRef = blockRef; this.block = blockRef.get(); this.seekable = seekable; @@ -45,15 +45,15 @@ public BlockIterState(boolean yieldSelf, Reference blockRef, Seekable see this.isFwd = isFwd; } - public static BlockIterState fwd(boolean yieldSelf, Reference blockRef, Seekable seekable) { + public static BlockIterState fwd(boolean yieldSelf, Reference blockRef, Seekable seekable) { return new BlockIterState(yieldSelf, blockRef, seekable, true); } - public static BlockIterState fwd(boolean yieldSelf, Reference blockRef) { + public static BlockIterState fwd(boolean yieldSelf, Reference blockRef) { return new BlockIterState(yieldSelf, blockRef, blockRef.get().newChannel(), true); } - public static BlockIterState bwd(boolean yieldSelf, Reference blockRef, Seekable seekable) { + public static BlockIterState bwd(boolean yieldSelf, Reference blockRef, Seekable seekable) { return new BlockIterState(yieldSelf, blockRef, seekable, false); } @@ -90,7 +90,7 @@ public void advance() { if(yieldSelf) { yieldSelf = false; } else { - Reference next = isFwd + Reference next = isFwd ? block.nextBlock() : block.prevBlock(); diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSource.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSource.java index 498709cf2..ff5ae2b9c 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSource.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSource.java @@ -5,8 +5,8 @@ import org.aksw.jena_sparql_api.io.common.Reference; public interface BlockSource { - Reference contentAtOrBefore(long pos, boolean inclusive) throws IOException; - Reference contentAtOrAfter(long pos, boolean inclusive) throws IOException; + Reference contentAtOrBefore(long pos, boolean inclusive) throws IOException; + Reference contentAtOrAfter(long pos, boolean inclusive) throws IOException; boolean hasBlockAfter(long pos) throws IOException; boolean hasBlockBefore(long pos) throws IOException; @@ -17,5 +17,12 @@ public interface BlockSource { // ByteBuffer lastContent(); // ByteBuffer getChannelForPos(long pos) throws IOException; + + /** + * Return the number of valid positions within blocks can be searched + * + * @return + * @throws IOException + */ long size() throws IOException; } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSources.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSources.java index c77107dda..14c64d271 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSources.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/BlockSources.java @@ -26,6 +26,19 @@ public static BinarySearcher createBinarySearcherBz2(FileChannel fileChannel, bo return result; } + + public static BinarySearcher createBinarySearcherText(Path path) throws IOException { + FileChannel channel = FileChannel.open(path, StandardOpenOption.READ); + BinarySearcher result = createBinarySearcherText(channel, true); + return result; + } + + public static BinarySearcher createBinarySearcherText(FileChannel fileChannel, boolean closeChannel) throws IOException { + PageManager pageManager = PageManagerForFileChannel.create(fileChannel); + BinarySearcher result = new BinarySearchOnBlockSource(pageManager, closeChannel ? fileChannel::close : null); + return result; + } + /** * Binary search over blocks * @@ -39,20 +52,20 @@ public static BinarySearcher createBinarySearcherBz2(FileChannel fileChannel, bo * @return A reference to a block that may contain the key or null if no candidate block was found * @throws Exception */ - public static Reference binarySearch(BlockSource blockSource, long min, long max, byte delimiter, byte[] prefix) throws IOException { + public static Reference binarySearch(BlockSource blockSource, long min, long max, byte delimiter, byte[] prefix) throws IOException { // System.out.println("[" + min + ", " + max + "["); if(min >= max) { return null; } - Reference result; + Reference result; long middlePos = (min + max) >> 1; // fast divide by 2 // Find the start of the record in the block: // In the first block, this is position 0 // otherwise this is the first delimiter - Reference blockRef = blockSource.contentAtOrBefore(middlePos, true); + Reference blockRef = blockSource.contentAtOrBefore(middlePos, true); if(blockRef == null) { return null; //Long.MIN_VALUE; } @@ -102,7 +115,7 @@ public static Reference binarySearch(BlockSource blockSource, long min, l // the search key may still be contained in this block // but check the upper half of the search range if there is another block //long lookupPos = pos + 1; - try(Reference nextBlockRef = blockSource.contentAtOrAfter(pos, false)) { + try(Reference nextBlockRef = blockSource.contentAtOrAfter(pos, false)) { // If there is no further block it implies we are in the last block if(nextBlockRef == null) { diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/DecodedDataBlock.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/DecodedDataBlock.java index f418ad67f..f47a29331 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/DecodedDataBlock.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/DecodedDataBlock.java @@ -25,12 +25,12 @@ public boolean hasPrev() throws IOException { } @Override - public Reference nextBlock() throws IOException { + public Reference nextBlock() throws IOException { return blockSource.contentAtOrAfter(blockStart, false); } @Override - public Reference prevBlock() throws IOException { + public Reference prevBlock() throws IOException { return blockSource.contentAtOrBefore(blockStart, false); } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/GraphFromPrefixMatcher.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/GraphFromPrefixMatcher.java index 1a26b336f..a08b6b41f 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/GraphFromPrefixMatcher.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/GraphFromPrefixMatcher.java @@ -1,5 +1,6 @@ package org.aksw.jena_sparql_api.io.binseach; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -102,9 +103,9 @@ protected ExtendedIterator graphBaseFindCore(Triple triplePattern) throw Node s = triplePattern.getSubject(); if(s.equals(Node.ANY) || s.isVariable()) { prefix = ""; - } else if(s.isBlank()) { + } else if (s.isBlank()) { prefix = "_:" + s.getBlankNodeLabel(); - } else if(s.isURI() ){ + } else if (s.isURI() ){ prefix = "<" + s.getURI() + ">"; } else { // Literal in subject position - skip @@ -113,7 +114,9 @@ protected ExtendedIterator graphBaseFindCore(Triple triplePattern) throw // System.out.println("PREFIX: " + prefix); - InputStream in = binarySearcher.search(prefix); + InputStream in = prefix == null + ? new ByteArrayInputStream(new byte[0]) + : binarySearcher.search(prefix); Stream baseStream = Streams.stream( //RDFDataMgrRx.createIteratorTriples(in, Lang.NTRIPLES, "http://www.example.org/")); diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/MainPlaygroundScanFile.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/MainPlaygroundScanFile.java index 12609acdb..fc503d84f 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/MainPlaygroundScanFile.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/MainPlaygroundScanFile.java @@ -163,14 +163,14 @@ public static void mainBz2Decode(String[] args) throws Exception { try(FileChannel fileChannel = FileChannel.open(path, StandardOpenOption.READ)) { PageManager pageManager = PageManagerForFileChannel.create(fileChannel); - long maxBlockOffset = pageManager.getEndPos(); + long maxBlockOffset = pageManager.size(); SeekableSource pagedSource = new SeekableSourceFromPageManager(pageManager); BlockSource blockSource = BlockSourceBzip2.create(pagedSource); byte[] prefix = "".getBytes(); - Reference blockRef = BlockSources.binarySearch(blockSource, 0, maxBlockOffset, (byte)'\n', prefix); + Reference blockRef = BlockSources.binarySearch(blockSource, 0, maxBlockOffset, (byte)'\n', prefix); if(blockRef == null) { System.out.println("No match found"); return; diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Page.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Page.java index 0634c06a9..3990be762 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Page.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/Page.java @@ -1,7 +1,10 @@ package org.aksw.jena_sparql_api.io.binseach; +import java.io.IOException; import java.nio.ByteBuffer; +import org.aksw.jena_sparql_api.io.common.Reference; + /** * A page is a fixed size sequence of bytes obtained from a page manager backed by a ByteBuffer. * Only the last page may have a smaller size than the others. @@ -18,8 +21,9 @@ * */ public interface Page + extends Block { - long getId(); + long getOffset(); PageManager getPageManager(); @@ -33,12 +37,36 @@ public interface Page */ ByteBuffer newBuffer(); + default Reference prevBlock() throws IOException { + return getPageManager().contentAtOrBefore(getOffset(), false); + } + + default Reference nextBlock() throws IOException { + return getPageManager().contentAtOrAfter(getOffset(), false); + } + + /** - * Release the page. - * No ByteBuffer obtained from this page should be used anymore + * Check if there is a subsequent block. * + * @return + * @throws IOException */ -// void release(); -// -// boolean isReleased(); + default boolean hasNext() throws IOException { + return getPageManager().hasBlockAfter(getOffset()); + } + + default boolean hasPrev() throws IOException { + return getPageManager().hasBlockAfter(getOffset()); + } + + default long length() throws IOException { + return getPageManager().getSizeOfBlock(getOffset()); + } + + @Override + default Seekable newChannel() { + ByteBuffer buf = newBuffer(); + return new PageNavigator(new PageManagerForByteBuffer(buf)); + } } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageBase.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageBase.java index 6c801e101..9b9d864dc 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageBase.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageBase.java @@ -2,50 +2,54 @@ import java.nio.ByteBuffer; -public class PageBase - implements Page +public class PageBase + implements Page { - protected PageManager pageManager; - protected long id; - protected ByteBuffer originalBuffer; - //Reference baseBufferRef; - //protected boolean isReleased; - - public PageBase(PageManager pageManager, long id, ByteBuffer originalBuffer) { - super(); - this.pageManager = pageManager; - this.id = id; - this.originalBuffer = originalBuffer; + protected PageManager pageManager; + protected long id; + protected ByteBuffer originalBuffer; + //Reference baseBufferRef; + //protected boolean isReleased; + + public PageBase(PageManager pageManager, long id, ByteBuffer originalBuffer) { + super(); + this.pageManager = pageManager; + this.id = id; + this.originalBuffer = originalBuffer; // this.baseBuffer = baseBuffer; // this.isReleased = false; - } - - @Override - public long getId() { - return id; - } - - @Override - public PageManager getPageManager() { - return pageManager; - } - - @Override - public ByteBuffer newBuffer() { - ByteBuffer result = originalBuffer.duplicate(); - return result; - } - + } + + @Override + public long getOffset() { + return id; + } + + @Override + public PageManager getPageManager() { + return pageManager; + } + + @Override + public ByteBuffer newBuffer() { + ByteBuffer result = originalBuffer.duplicate(); + return result; + } + + @Override + public void close() throws Exception { + } + // @Override // public void release() { // baseBufferRef.release(); // } -// +// // @Override // public boolean isReleased() { // boolean result = baseBufferRef.isReleased(); // return result; // } - - //public abstract void doRelease(); + + //public abstract void doRelease(); } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManager.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManager.java index 55b11966e..ac6fef61d 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManager.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManager.java @@ -1,20 +1,95 @@ package org.aksw.jena_sparql_api.io.binseach; +import java.io.IOException; + import org.aksw.jena_sparql_api.io.common.Reference; -public interface PageManager{ - Reference requestBufferForPage(long page); - - //ByteBuffer requestBufferForPage(long page); - - /** - * The pageSize. Must never change during the life time of a page manager. - * - * @return - */ - int getPageSize(); - - - long getEndPos(); - // TODO Add a release mechanism + +/** + * A PageSource (TODO change to that naming) is a special kind of BlockSource where all blocks have the same size + * and there is a 1:1 correspondance between a pages and non-overlapping regions in the underlying buffer. + * + * Consecutive page ids do not necessarily have to refer to consecutive regions. + * + * @author raven + * + */ +public interface PageManager + extends BlockSource +{ + Reference requestBufferForPage(long page); + + //ByteBuffer requestBufferForPage(long page); + + /** + * The pageSize. Must never change during the life time of a page manager. + * + * @return + */ + int getPageSize(); + + /** + * The maximum position in the underlying buffer + * + * @return + */ + long getEndPos(); + + + /** + * Retrieve the number of available pages + * + */ + default long size() { + long endPos = getEndPos(); + int pageSize = getPageSize(); + long result = endPos / pageSize + 1 - (endPos % pageSize == 0 ? 1 : 0); + return result; + } + + + default Reference contentAtOrBefore(long pos, boolean inclusive) throws IOException { + Reference result = inclusive + ? requestBufferForPage(pos) + : (hasBlockBefore(pos) ? requestBufferForPage(pos - 1) : null); + return result; + } + + default Reference contentAtOrAfter(long pos, boolean inclusive) throws IOException { + Reference result = inclusive + ? requestBufferForPage(pos) + : (hasBlockAfter(pos) ? requestBufferForPage(pos + 1) : null); + return result; + } + + default boolean hasBlockAfter(long pos) throws IOException { + long pageCount = size(); + boolean result = pos >= -1 && pos < pageCount; + return result; + } + + default boolean hasBlockBefore(long pos) throws IOException { + long pageCount = size(); + boolean result = pos == pageCount || pos > 0; + return result; + + } + + /** + * Return the size of the block; all but the last block are guaranteed to have the same size + * + */ + default long getSizeOfBlock(long pos) throws IOException { + int pageSize = getPageSize(); + long lastIndex = size() - 1; + + long endPos = size(); + + long result = pos < lastIndex + ? pageSize + : (pos == lastIndex ? endPos % pageSize: 0); + + return result; + } + } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForByteBuffer.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForByteBuffer.java index c282afee3..da5e4e6ce 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForByteBuffer.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForByteBuffer.java @@ -7,36 +7,36 @@ /** * PageManager wrapper for a static buffer - * + * * @author raven * */ public class PageManagerForByteBuffer - implements PageManager + implements PageManager { // protected ByteBuffer staticBuffer; - protected long pageForBuf; - protected ByteBuffer staticBuffer; - - public PageManagerForByteBuffer(ByteBuffer staticBuffer) { - this.staticBuffer = staticBuffer; - //this.staticBuffer = staticBuffer; - this.pageForBuf = 0; - } + protected long pageForBuf; + protected ByteBuffer staticBuffer; + + public PageManagerForByteBuffer(ByteBuffer staticBuffer) { + this.staticBuffer = staticBuffer; + //this.staticBuffer = staticBuffer; + this.pageForBuf = 0; + } - @Override - public Reference requestBufferForPage(long page) { - Page staticPage = new PageBase(this, 0, staticBuffer); - return ReferenceImpl.create(staticPage, null, "Reference to static page"); - } + @Override + public Reference requestBufferForPage(long page) { + Page staticPage = new PageBase(this, 0, staticBuffer); + return ReferenceImpl.create(staticPage, null, "Reference to static page"); + } - @Override - public int getPageSize() { - return staticBuffer.remaining(); - } + @Override + public int getPageSize() { + return staticBuffer.remaining(); + } - @Override - public long getEndPos() { - return staticBuffer.remaining(); - } + @Override + public long getEndPos() { + return staticBuffer.remaining(); + } } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForFileChannel.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForFileChannel.java index 75ea61503..b1558faf4 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForFileChannel.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerForFileChannel.java @@ -79,11 +79,6 @@ public Reference requestBufferForPage(long page) { return result; } - @Override - public long getEndPos() { - return channelSize; - } - public synchronized Reference getRefForPage(long page) throws IOException { long start = page * pageSize; long end = Math.min(channelSize, start + pageSize); @@ -122,4 +117,9 @@ public synchronized Reference getRefForPage(long page) throws IOException return result; } + + @Override + public long getEndPos() { + return channelSize; + } } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerWrapper.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerWrapper.java index 8f633722f..1a8a36a22 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerWrapper.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageManagerWrapper.java @@ -8,130 +8,130 @@ /** * A wrapper that virtually puts a displaced page view over a delegate - * + * * There must be a 1:1 correspondence between page and byte buffer. * Hence, if a virtual page stretches over multiple physical ones, the data is copied * into a buffer of sufficient size. - * - * + * + * * view: displacement [ ] [ ] [ ] [ ] [ ] [ ] - * delegate: [ p1 ] [ p2 ] [ p3 ] [ p4 ] - * + * delegate: [ p1 ] [ p2 ] [ p3 ] [ p4 ] + * * @author raven * */ public class PageManagerWrapper - implements PageManager + implements PageManager { - protected PageManager delegate; - protected long displacement; - protected int virtPageSize; - - public PageManagerWrapper(PageManager delegate, long displacement, int pageSize) { - super(); - this.delegate = delegate; - this.displacement = displacement; - this.virtPageSize = pageSize; - } - - @Override - public Reference requestBufferForPage(long page) { - int physPageSize = delegate.getPageSize(); - - //page * pageSize; - long effPos = page * virtPageSize - displacement; - long effPage = effPos / physPageSize; - int effIndex = (int)effPos % physPageSize; - - long effEndPos = effPos + virtPageSize; - long effEndPage = effEndPos / physPageSize; - int effEndIndex = (int)effEndPos % physPageSize; - - - ByteBuffer resultBuffer; - Reference delegatePage; - if(effPage == effEndPage) { - delegatePage = delegate.requestBufferForPage(effPage); - ByteBuffer buf = delegatePage.get().newBuffer(); - if(buf == null) { - resultBuffer = null; - } else { - int o = buf.position(); - //if(buf.remaining() > virtPageSize) { - // We expect the page to have sufficient size - resultBuffer = buf.duplicate(); - + protected PageManager delegate; + protected long displacement; + protected int virtPageSize; + + public PageManagerWrapper(PageManager delegate, long displacement, int pageSize) { + super(); + this.delegate = delegate; + this.displacement = displacement; + this.virtPageSize = pageSize; + } + + @Override + public Reference requestBufferForPage(long page) { + int physPageSize = delegate.getPageSize(); + + //page * pageSize; + long effPos = page * virtPageSize - displacement; + long effPage = effPos / physPageSize; + int effIndex = (int)effPos % physPageSize; + + long effEndPos = effPos + virtPageSize; + long effEndPage = effEndPos / physPageSize; + int effEndIndex = (int)effEndPos % physPageSize; + + + ByteBuffer resultBuffer; + Reference delegatePage; + if(effPage == effEndPage) { + delegatePage = delegate.requestBufferForPage(effPage); + ByteBuffer buf = delegatePage.get().newBuffer(); + if(buf == null) { + resultBuffer = null; + } else { + int o = buf.position(); + //if(buf.remaining() > virtPageSize) { + // We expect the page to have sufficient size + resultBuffer = buf.duplicate(); + // int start = o + effIndex; // if(start < 0) { // // create a new buffer and pad -// +// // } - - resultBuffer.position(o + effIndex); - resultBuffer.limit(o + effEndIndex); - } - //} - } else { - byte[] cpy = new byte[virtPageSize]; - resultBuffer = ByteBuffer.wrap(cpy); - - for(long i = effPage;; ++i) { - delegatePage = delegate.requestBufferForPage(i); - try { - ByteBuffer buf = delegatePage.get().newBuffer(); - if(buf != null) { - int o = buf.position(); - - buf = buf.duplicate(); - int index = i == effPage ? effIndex : 0; - buf.position(o + index); - - //int x = buf.remaining(); - int take = Math.min(buf.remaining(), resultBuffer.remaining()); - buf.limit(buf.position() + take); - resultBuffer.put(buf); - - if(resultBuffer.remaining() == 0) { - resultBuffer.position(0); - break; - } - } else { - break; - } - } finally { - try { - delegatePage.close(); - } catch(Exception e) { - throw new RuntimeException(e); - } - } - } - - - } - - // This is pretty hacky: - // We pass a dummy reference to the buffer, but the actual release happens on the - // local delPage attribute - - Reference delPage = delegatePage; - Page tmp = new PageBase(this, page, resultBuffer); - Reference result = ReferenceImpl.create(tmp, () -> { - if(delPage != null) { - delPage.close(); - } - }, null); - - return result; - } - - @Override - public int getPageSize() { - return virtPageSize; - } - @Override - public long getEndPos() { - long result = delegate.getEndPos(); - return result; - } + + resultBuffer.position(o + effIndex); + resultBuffer.limit(o + effEndIndex); + } + //} + } else { + byte[] cpy = new byte[virtPageSize]; + resultBuffer = ByteBuffer.wrap(cpy); + + for(long i = effPage;; ++i) { + delegatePage = delegate.requestBufferForPage(i); + try { + ByteBuffer buf = delegatePage.get().newBuffer(); + if(buf != null) { + int o = buf.position(); + + buf = buf.duplicate(); + int index = i == effPage ? effIndex : 0; + buf.position(o + index); + + //int x = buf.remaining(); + int take = Math.min(buf.remaining(), resultBuffer.remaining()); + buf.limit(buf.position() + take); + resultBuffer.put(buf); + + if(resultBuffer.remaining() == 0) { + resultBuffer.position(0); + break; + } + } else { + break; + } + } finally { + try { + delegatePage.close(); + } catch(Exception e) { + throw new RuntimeException(e); + } + } + } + + + } + + // This is pretty hacky: + // We pass a dummy reference to the buffer, but the actual release happens on the + // local delPage attribute + + Reference delPage = delegatePage; + Page tmp = new PageBase(this, page, resultBuffer); + Reference result = ReferenceImpl.create(tmp, () -> { + if(delPage != null) { + delPage.close(); + } + }, null); + + return result; + } + + @Override + public int getPageSize() { + return virtPageSize; + } + + @Override + public long getEndPos() { + return delegate.getEndPos(); + } } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageNavigator.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageNavigator.java index 393148e74..6a0a93a45 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageNavigator.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/PageNavigator.java @@ -39,7 +39,7 @@ public class PageNavigator * Initialization happens in getBufferForPage() */ - protected Reference pageObj = null; + protected Reference pageObj = null; protected ByteBuffer pageBuffer = null; protected int displacement; @@ -237,7 +237,7 @@ public ByteBuffer getBufferForPage(long page) throws IOException { displacement = 0; } - // updateRelCache(page); + updateRelCache(page); absMinIndexInPage = displacement + relMinIndexInPage; absMaxIndexInPage = displacement + relMaxIndexInPage; @@ -490,7 +490,7 @@ public boolean prevPos(int delta) throws IOException { tgtPage = page - 1; tgtIndex = pageSize - (relMinIndexInPage - prevIndex); } else { - long p = getPos() + delta; + long p = getPos() - delta; tgtPage = getPageForPos(p); tgtIndex = getIndexForPos(p); } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/ReverseCharSequenceFromSeekable.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/ReverseCharSequenceFromSeekable.java index 777455994..3b4be1667 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/ReverseCharSequenceFromSeekable.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/ReverseCharSequenceFromSeekable.java @@ -28,6 +28,9 @@ public int length() { @Override public char charAt(int index) { +// if(index == 16777673) { +// System.err.println("DEBUG POiNT"); +// } try { int p = offset + index; seekable.prevPos(p); diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableFromBlock.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableFromBlock.java index 757fdff88..f58972c5d 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableFromBlock.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableFromBlock.java @@ -11,11 +11,11 @@ // Combine reference to a block with a channel class OpenBlock{ - public Reference blockRef; + public Reference blockRef; public Block block; public Seekable seekable; - public OpenBlock(Reference blockRef, Seekable seekable) { + public OpenBlock(Reference blockRef, Seekable seekable) { this.blockRef = blockRef; this.block = blockRef.get(); this.seekable = seekable; @@ -36,7 +36,7 @@ void close() throws IOException { // } // // OpenedBlock closeAndNext() { -// Reference next = blockRef.get().nextBlock(); +// Reference next = blockRef.get().nextBlock(); // f // } } @@ -56,7 +56,7 @@ void close() throws IOException { public class SeekableFromBlock implements Seekable { - protected Reference startBlockRef; + protected Reference startBlockRef; protected int startPosInStartSegment; /** @@ -72,17 +72,17 @@ public class SeekableFromBlock protected long maxPos; protected long minPos; - protected Reference currentBlockRef; + protected Reference currentBlockRef; protected Block currentBlock; // cache of currentBlockRef.get() protected Seekable currentSeekable; // currentBlock.newChannel() protected long actualPos; - public SeekableFromBlock(Reference startBlockRef, int posInStartSegment, long exposedStartPos) { + public SeekableFromBlock(Reference startBlockRef, int posInStartSegment, long exposedStartPos) { this(startBlockRef, posInStartSegment, exposedStartPos, Long.MIN_VALUE, Long.MAX_VALUE); } - public SeekableFromBlock(Reference startBlockRef, int posInStartSegment, long exposedStartPos, long minPos, long maxPos) { + public SeekableFromBlock(Reference startBlockRef, int posInStartSegment, long exposedStartPos, long minPos, long maxPos) { super(); this.startBlockRef = startBlockRef; this.startPosInStartSegment = posInStartSegment; @@ -165,10 +165,23 @@ public void posToStart() throws IOException { // Replace end with the concept of a fwd horizon + // We would have to read the block fully if we wanted to position at the end @Override public void posToEnd() throws IOException { // pos = maxPos; throw new UnsupportedOperationException(); +// try { +// currentBlockRef.close(); +// currentBlockRef = startBlockRef.acquire(null); +// currentBlock = currentBlockRef.get(); +// currentSeekable = currentBlock.newChannel(); +// currentSeekable.posToEnd(); +// actualPos = exposedStartPos; +// } catch (Exception e) { +// throw new RuntimeException(e); +// } +// actualPos = maxPos; + } @@ -182,8 +195,8 @@ public boolean isPosBeforeStart() throws IOException { } - Reference openNextCloseCurrent(Reference current, Reference exclude) throws IOException { - Reference result = current.get().nextBlock(); + Reference openNextCloseCurrent(Reference current, Reference exclude) throws IOException { + Reference result = current.get().nextBlock(); try { if(current != exclude) { current.close(); @@ -214,7 +227,7 @@ public boolean isPosAfterEnd() throws IOException { } protected boolean loadNextBlock() throws IOException { - Reference nextBlockRef = currentBlockRef.get().nextBlock(); + Reference nextBlockRef = currentBlockRef.get().nextBlock(); boolean result = nextBlockRef != null; if(result) { @@ -234,7 +247,7 @@ protected boolean loadNextBlock() throws IOException { } // protected boolean loadPrevBlock() throws Exception { -// Reference nextBlockRef = currentBlockRef.get().prevBlock(); +// Reference nextBlockRef = currentBlockRef.get().prevBlock(); // boolean result = nextBlockRef != null; // // if(result) { @@ -260,13 +273,13 @@ protected boolean loadNextBlock() throws IOException { class State { - public State(Reference blockRef, Seekable channel) { + public State(Reference blockRef, Seekable channel) { super(); this.blockRef = blockRef; this.channel = channel; } - Reference blockRef; + Reference blockRef; Seekable channel; } @@ -296,7 +309,7 @@ public int posToNext(byte delimiter, boolean changePos) throws IOException { if(contrib >= 0) { result = contrib; } else { - Reference tmpBlockRef = currentBlockRef; + Reference tmpBlockRef = currentBlockRef; Block tmpBlock = currentBlock; Seekable tmpSeekable = currentSeekable; @@ -305,7 +318,7 @@ public int posToNext(byte delimiter, boolean changePos) throws IOException { posDelta += -contrib + 1; // Check whether there is a successor block - Reference nextBlockRef = contrib > 0 ? null : tmpBlockRef.get().nextBlock(); + Reference nextBlockRef = contrib > 0 ? null : tmpBlockRef.get().nextBlock(); if(nextBlockRef == null) { currentBlockRef = tmpBlockRef; @@ -478,7 +491,7 @@ public int checkPrev(int len, boolean changePos) throws IOException { return result; } // int remaining = len - contrib; -// Reference tmpBlockRef = currentBlockRef; +// Reference tmpBlockRef = currentBlockRef; // Block tmpBlock = currentBlock; // Seekable tmpSeekable = currentSeekable; // @@ -487,7 +500,7 @@ public int checkPrev(int len, boolean changePos) throws IOException { // remaining -= contrib; // // // Check whether there is a successor block -// Reference nextBlockRef = tmpBlockRef.get().nextBlock(); +// Reference nextBlockRef = tmpBlockRef.get().nextBlock(); // // if(nextBlockRef == null) { // currentBlockRef = tmpBlockRef; diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableSourceFromPageManager.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableSourceFromPageManager.java index 216047be8..5d808d659 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableSourceFromPageManager.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/SeekableSourceFromPageManager.java @@ -1,31 +1,32 @@ package org.aksw.jena_sparql_api.io.binseach; public class SeekableSourceFromPageManager - implements SeekableSource + implements SeekableSource { - protected PageManager pageManager; + protected PageManager pageManager; - public SeekableSourceFromPageManager(PageManager pageManager) { - super(); - this.pageManager = pageManager; - } + public SeekableSourceFromPageManager(PageManager pageManager) { + super(); + this.pageManager = pageManager; + } - @Override - public boolean supportsAbsolutePosition() { - return true; - } + @Override + public boolean supportsAbsolutePosition() { + return true; + } - @Override - public Seekable get(long pos) { - PageNavigator result = new PageNavigator(pageManager); - result.setPos(pos); - return result; - } + @Override + public Seekable get(long pos) { + PageNavigator result = new PageNavigator(pageManager); - @Override - public long size() { - long result = pageManager.getEndPos(); - return result; - } + result.setPos(pos); + return result; + } + + @Override + public long size() { + long result = pageManager.getEndPos(); + return result; + } } diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/bz2/BlockSourceBzip2.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/bz2/BlockSourceBzip2.java index a27c850a1..35ac71a5e 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/bz2/BlockSourceBzip2.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/binseach/bz2/BlockSourceBzip2.java @@ -106,6 +106,7 @@ public Reference contentAtOrBefore(long requestPos, boolean inclusive) th if(result == null) { Seekable seekable = seekableSource.get(internalRequestPos); +// System.out.println("Size: " + seekableSource.size()); // SeekableMatcher matcher = bwdBlockStartMatcherFactory.newMatcher(); diff --git a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/split/InputSplits.java b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/split/InputSplits.java index 8bae169f1..d2acb8a18 100644 --- a/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/split/InputSplits.java +++ b/jena-sparql-api-io-core/src/main/java/org/aksw/jena_sparql_api/io/split/InputSplits.java @@ -23,7 +23,7 @@ public static List createInputSplits(BlockSource blockSource) throws Exc long priorOffset = -1; for(int i = 0; i < rawSplits.size(); ++i) { - try(Reference blockRef = blockSource.contentAtOrAfter(i, true)) { + try(Reference blockRef = blockSource.contentAtOrAfter(i, true)) { if(blockRef != null) { // Ensure that no distinct split resolve to the same block - otherwise ignore // those splits that are too close to each other @@ -72,7 +72,7 @@ public static List createInputSplits(FileChannel fileChannel) throws IOE // PageManager pageManager = PageManagerForFileChannel.create(fileChannel, 128 * 1024 * 1024); // PageManager pageManager = new PageManagerForByteBuffer(ByteBuffer.wrap(str.getBytes())); - long size = pageManager.getEndPos(); + long size = pageManager.size(); //long size = fileChannel.size(); int numChunks = 4; //32; boolean fwd = true; diff --git a/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearch.java b/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearch.java index 32873da4f..59ff66cc8 100644 --- a/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearch.java +++ b/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearch.java @@ -44,7 +44,7 @@ public static void listBz2Blocks() throws IOException { if(true) { - Reference block = blockSource.contentAtOrAfter(21133549, true); + Reference block = blockSource.contentAtOrAfter(21133549, true); BlockIterState state = new BlockIterState(true, block, null, true); while(state.hasNext()) { state.advance(); @@ -64,7 +64,7 @@ public static void listBz2Blocks() throws IOException { // 1043168 if(true) { - Reference block = blockSource.contentAtOrBefore(162881, true); + Reference block = blockSource.contentAtOrBefore(162881, true); BlockIterState state = new BlockIterState(true, block, null, false); while(state.hasNext()) { state.advance(); diff --git a/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearchBz2.java b/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearchBz2.java index 78b62ad66..021e59bae 100644 --- a/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearchBz2.java +++ b/jena-sparql-api-io-core/src/test/java/org/aksw/jena_sparql_api/io/binsearch/TestBinSearchBz2.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -19,6 +20,7 @@ import org.aksw.jena_sparql_api.rx.GraphOpsRx; import org.aksw.jena_sparql_api.rx.RDFDataMgrRx; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.io.IOUtils; import org.apache.jena.graph.Graph; import org.apache.jena.graph.Node; import org.apache.jena.riot.Lang; @@ -152,6 +154,17 @@ public static void runTest() throws IOException { logger.debug("Needed " + (sw.elapsed(TimeUnit.MILLISECONDS) * 0.001) + " seconds for " + i + " lookups on " + path); } + + +// @Test + public void testLocalBinSearch() throws IOException, Exception { + try(BinarySearcher bs = BlockSources.createBinarySearcherBz2(Paths.get("/home/raven/tmp/sorttest/dnb-all_lds_20200213.sorted.nt.bz2"))) { + try (InputStream in = bs.search("")) { + System.out.println("Output: " + IOUtils.toString(in, StandardCharsets.UTF_8)); + } + } + } + } //public static void doAssert(BinarySearcher searcher, String key, int expectedLines) throws IOException { diff --git a/jena-sparql-api-io-hdt/pom.xml b/jena-sparql-api-io-hdt/pom.xml index 923832933..2f98efa04 100644 --- a/jena-sparql-api-io-hdt/pom.xml +++ b/jena-sparql-api-io-hdt/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-json/pom.xml b/jena-sparql-api-json/pom.xml index cfaef77e0..0f48d79b0 100644 --- a/jena-sparql-api-json/pom.xml +++ b/jena-sparql-api-json/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-mapper-annotations/pom.xml b/jena-sparql-api-mapper-annotations/pom.xml index a9e06c9df..4635ff507 100644 --- a/jena-sparql-api-mapper-annotations/pom.xml +++ b/jena-sparql-api-mapper-annotations/pom.xml @@ -11,7 +11,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-mapper-annotations/src/main/java/org/aksw/jena_sparql_api/mapper/annotation/HashId.java b/jena-sparql-api-mapper-annotations/src/main/java/org/aksw/jena_sparql_api/mapper/annotation/HashId.java index 999419f51..1a2d06c85 100644 --- a/jena-sparql-api-mapper-annotations/src/main/java/org/aksw/jena_sparql_api/mapper/annotation/HashId.java +++ b/jena-sparql-api-mapper-annotations/src/main/java/org/aksw/jena_sparql_api/mapper/annotation/HashId.java @@ -47,7 +47,7 @@ * * * - * Using {@code @HashId} on class level allows giving post-processing all obtained hashes + * Using {@code @HashId} on class level allows post-processing all obtained hashes * with a hash based on the class. By default it is derived from the class name. * The following example demonstrates that even if .getId() of A and B yield the same hash, * the final hash will be combined with the hash of A and B respectively: diff --git a/jena-sparql-api-mapper-parent/jena-sparql-api-mapper-core/pom.xml b/jena-sparql-api-mapper-parent/jena-sparql-api-mapper-core/pom.xml index ea6d811e7..c139c76b9 100644 --- a/jena-sparql-api-mapper-parent/jena-sparql-api-mapper-core/pom.xml +++ b/jena-sparql-api-mapper-parent/jena-sparql-api-mapper-core/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-mapper-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-mapper-parent/pom.xml b/jena-sparql-api-mapper-parent/pom.xml index 7a258c91a..fecb8fc37 100644 --- a/jena-sparql-api-mapper-parent/pom.xml +++ b/jena-sparql-api-mapper-parent/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-mapper-proxy/pom.xml b/jena-sparql-api-mapper-proxy/pom.xml index 097e4f811..a8f6449b9 100644 --- a/jena-sparql-api-mapper-proxy/pom.xml +++ b/jena-sparql-api-mapper-proxy/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/ImplementationDelegate.java b/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/ImplementationDelegate.java new file mode 100644 index 000000000..1702bfe92 --- /dev/null +++ b/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/ImplementationDelegate.java @@ -0,0 +1,26 @@ +package org.aksw.jena_sparql_api.mapper.proxy; + +import org.apache.jena.enhanced.EnhGraph; +import org.apache.jena.enhanced.EnhNode; +import org.apache.jena.enhanced.Implementation; +import org.apache.jena.graph.Node; + +public abstract class ImplementationDelegate + extends Implementation +{ + protected abstract Implementation getDelegate(); + + @Override + public EnhNode wrap(Node node, EnhGraph eg) { + Implementation delegate = getDelegate(); + EnhNode result = delegate.wrap(node, eg); + return result; + } + + @Override + public boolean canWrap(Node node, EnhGraph eg) { + Implementation delegate = getDelegate(); + boolean result = delegate.canWrap(node, eg); + return result; + } +} diff --git a/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/ImplementationLazy.java b/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/ImplementationLazy.java new file mode 100644 index 000000000..0a63603ea --- /dev/null +++ b/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/ImplementationLazy.java @@ -0,0 +1,41 @@ +package org.aksw.jena_sparql_api.mapper.proxy; + +import java.util.Objects; +import java.util.function.Supplier; + +import org.apache.jena.enhanced.Implementation; + +public class ImplementationLazy + extends ImplementationDelegate +{ + protected Supplier ctor; + protected Class targetClass; + protected Implementation delegate; + + /** + * + * @param ctor The supplier from which the delagate of this class is obtained + * @param targetClass The class the implementation is for. Serves informational purpose only. + */ + public ImplementationLazy(Supplier ctor, Class targetClass) { + super(); + this.ctor = ctor; + this.targetClass = targetClass; + this.delegate = null; + } + + @Override + protected Implementation getDelegate() { + if (delegate == null) { + synchronized (this) { + if (delegate == null) { + delegate = Objects.requireNonNull(ctor.get(), + "Lazy request for implementation for " + targetClass + " was answered with null"); + } + } + } + + return delegate; + } + +} diff --git a/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/JenaPluginUtils.java b/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/JenaPluginUtils.java index 0ec8f1eb2..f1b6f9def 100644 --- a/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/JenaPluginUtils.java +++ b/jena-sparql-api-mapper-proxy/src/main/java/org/aksw/jena_sparql_api/mapper/proxy/JenaPluginUtils.java @@ -25,6 +25,7 @@ import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.impl.ResourceImpl; import org.apache.jena.shared.PrefixMapping; +import org.apache.jena.shared.impl.PrefixMappingImpl; import org.apache.jena.sys.JenaSystem; import org.apache.jena.util.ResourceUtils; import org.slf4j.Logger; @@ -40,8 +41,15 @@ public class JenaPluginUtils { } /** - * If you get an exception on typeDecider such as java.lang.NullPointerException - * ensure to call JenaSystem.init() before calling methods on this class + * The type decider can + *
    + *
  • decide for a given RDFNode whether a certain class can act as a view for it
  • + *
  • for a given view write out those triples to an RDFNode such that the type decider + * will consider the original view as applicable
  • + *
+ * + * If you get an exception on typeDecider such as java.lang.NullPointerException + * ensure to call JenaSystem.init() before calling methods on this class */ protected static TypeDeciderImpl typeDecider; @@ -147,6 +155,7 @@ public static void scan(String basePackage, Personality p) { public static void scan(String basePackage, Personality p, PrefixMapping pm) { Set classInfos; +// System.err.println("Scanning " + basePackage); try { classInfos = ClassPath.from(Thread.currentThread().getContextClassLoader()).getTopLevelClassesRecursive(basePackage); } catch (IOException e) { @@ -156,11 +165,12 @@ public static void scan(String basePackage, Personality p, PrefixMappin for(ClassInfo classInfo : classInfos) { Class clazz = classInfo.load(); - registerResourceClass(clazz, p, pm); + registerResourceClass(clazz, p, pm, true); } } - public static void registerResourceClasses(Class ... classes) { + @SafeVarargs + public static void registerResourceClasses(Class ... classes) { registerResourceClasses(Arrays.asList(classes)); } @@ -183,13 +193,28 @@ public static void registerResourceClass(Class inter, Class< } } + public static Implementation createImplementation(Class clazz, PrefixMapping pm, boolean lazy) { + Implementation result; + if (lazy) { + // Better clone the prefix mapping as the provided may have changed + // by the time we actually perform the init + PrefixMapping clone = new PrefixMappingImpl(); + clone.setNsPrefixes(pm); + result = new ImplementationLazy(() -> createImplementation(clazz, pm), clazz); + } else { + result = createImplementation(clazz, pm); + } + + return result; + } + public static Implementation createImplementation(Class clazz, PrefixMapping pm) { @SuppressWarnings("unchecked") Class cls = (Class)clazz; TypeDecider typeDecider = getTypeDecider(); - logger.debug("Registering " + clazz); + logger.debug("Creating implementation for " + clazz); BiFunction proxyFactory = MapperProxyUtils.createProxyFactory(cls, pm, typeDecider); @@ -209,6 +234,11 @@ public static Implementation createImplementation(Class clazz, PrefixMapping } public static void registerResourceClass(Class clazz, Personality p, PrefixMapping pm) { + registerResourceClass(clazz, p, pm, false); + } + + public static void registerResourceClass(Class clazz, Personality p, PrefixMapping pm, boolean lazy) { + if(Resource.class.isAssignableFrom(clazz)) { boolean supportsProxying = supportsProxying(clazz); if(supportsProxying) { @@ -225,7 +255,7 @@ public static void registerResourceClass(Class clazz, Personality p, List> effectiveTypes = new ArrayList<>(Arrays.asList(superTypes)); //effectiveTypes.add(clazz); - Implementation impl = createImplementation(clazz, pm); + Implementation impl = createImplementation(clazz, pm, lazy); for(Class type : effectiveTypes) { if(!type.isAssignableFrom(clazz)) { @@ -234,6 +264,7 @@ public static void registerResourceClass(Class clazz, Personality p, @SuppressWarnings("unchecked") Class cls = (Class)type; + logger.debug("Registering " + clazz); p.add(cls, impl); } } diff --git a/jena-sparql-api-mapper-proxy/src/test/java/org/aksw/jena_sparql_api/mapper/proxy/TestMapperProxyUtils.java b/jena-sparql-api-mapper-proxy/src/test/java/org/aksw/jena_sparql_api/mapper/proxy/TestMapperProxyUtils.java index 1984aa132..6d36fae35 100644 --- a/jena-sparql-api-mapper-proxy/src/test/java/org/aksw/jena_sparql_api/mapper/proxy/TestMapperProxyUtils.java +++ b/jena-sparql-api-mapper-proxy/src/test/java/org/aksw/jena_sparql_api/mapper/proxy/TestMapperProxyUtils.java @@ -1,8 +1,10 @@ package org.aksw.jena_sparql_api.mapper.proxy; import java.util.Arrays; +import java.util.Calendar; import java.util.Collection; import java.util.Collections; +import java.util.GregorianCalendar; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -16,11 +18,21 @@ import org.aksw.jena_sparql_api.mapper.annotation.ResourceView; import org.aksw.jena_sparql_api.rdf.collections.NodeMappers; import org.aksw.jena_sparql_api.rdf.collections.ResourceUtils; +import org.aksw.jena_sparql_api.utils.model.PrefixMapAdapter; +import org.apache.jena.datatypes.xsd.XSDDateTime; import org.apache.jena.ext.com.google.common.collect.Sets; +import org.apache.jena.graph.NodeFactory; +import org.apache.jena.graph.Triple; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Statement; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RDFFormat; +import org.apache.jena.riot.system.PrefixMap; +import org.apache.jena.riot.system.PrefixMapExtended; +import org.apache.jena.riot.system.RiotLib; +import org.apache.jena.sparql.ARQConstants; import org.apache.jena.sys.JenaSystem; import org.apache.jena.vocabulary.RDF; import org.apache.jena.vocabulary.RDFS; @@ -57,6 +69,9 @@ public static interface TestResource Map getSimpleMap(); + XSDDateTime getDateTime(); + TestResource setDateTime(XSDDateTime xsdDateTime); + // @Iri("eg:collection") // TestResource setList(List strs); // List getList(); @@ -100,6 +115,10 @@ public static interface TestResourceDefault @Iri("eg:simpleMap") Map getSimpleMap(); + @Iri("eg:dateTime") + XSDDateTime getDateTime(); + TestResourceDefault setDateTime(); + // @Iri("eg:collection") // TestResource setList(List strs); @@ -283,5 +302,32 @@ public void testSimpleMap() { Assert.assertEquals(sb.getSimpleMap().get("value"), 123); } + @Test + public void testDateTime() { + JenaSystem.init(); + JenaPluginUtils.registerResourceClasses(TestResourceDefault.class); + TestResource sb = ModelFactory.createDefaultModel().createResource().as(TestResource.class); + + Calendar actual = new GregorianCalendar(); + sb.setDateTime(new XSDDateTime(actual)); + + +// RDFDataMgr.write(System.out, sb.getModel(), RDFFormat.TURTLE_BLOCKS); + XSDDateTime tmp; + Assert.assertNotNull(tmp = sb.getDateTime()); + + sb.setDateTime(null); + Assert.assertNull(tmp = sb.getDateTime()); + + sb.getModel().getGraph().add(new Triple( + sb.asNode(), + NodeFactory.createURI("http://www.example.org/dateTime"), + RiotLib.parse("\"2020-10-07T13:03:58.471+00:00\"^^"))); + + Assert.assertNotNull(tmp = sb.getDateTime()); + + //Calendar expected = tmp.asCalendar(); + + } } diff --git a/jena-sparql-api-path-finding/pom.xml b/jena-sparql-api-path-finding/pom.xml index cff7520a5..64f7f467c 100644 --- a/jena-sparql-api-path-finding/pom.xml +++ b/jena-sparql-api-path-finding/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 @@ -27,6 +27,12 @@ jena-jgrapht-bindings + + + org.apache.jena + jena-arq + + org.aksw.jena-sparql-api jena-sparql-api-core diff --git a/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderBidirectionalUtils.java b/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderBidirectionalUtils.java index e7d134147..b7cc69d71 100644 --- a/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderBidirectionalUtils.java +++ b/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderBidirectionalUtils.java @@ -122,10 +122,10 @@ public static UnaryRelation createUnboundAwareTypeQuery(UnaryRelation concept) { UnaryRelation result; if(concept.isSubjectConcept()) { - result = Concept.parse("?t | ?s a ?t"); + result = Concept.parse("?t { ?s a ?t }"); } else { - Concept fragment = Concept.parse("?t | OPTIONAL { ?s a ?tmp } BIND(IF(BOUND(?tmp), ?tmp, eg:unbound) AS ?t)", PrefixMapping.Extended); + Concept fragment = Concept.parse("?t { OPTIONAL { ?s a ?tmp } BIND(IF(BOUND(?tmp), ?tmp, eg:unbound) AS ?t) }", PrefixMapping.Extended); result = fragment .prependOn(Vars.s) .with(concept) diff --git a/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderSystem3.java b/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderSystem3.java index c7825b28c..44dd668d3 100644 --- a/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderSystem3.java +++ b/jena-sparql-api-path-finding/src/main/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/ConceptPathFinderSystem3.java @@ -9,7 +9,7 @@ import org.aksw.jena_sparql_api.concepts.Concept; import org.aksw.jena_sparql_api.concepts.ConceptUtils; import org.aksw.jena_sparql_api.concepts.UnaryRelation; -import org.aksw.jena_sparql_api.core.RDFConnectionFactoryEx; +import org.aksw.jena_sparql_api.core.connection.RDFConnectionFactoryEx; import org.aksw.jena_sparql_api.sparql_path.api.ConceptPathFinder; import org.aksw.jena_sparql_api.sparql_path.api.ConceptPathFinderBase; import org.aksw.jena_sparql_api.sparql_path.api.ConceptPathFinderFactorySummaryBase; diff --git a/jena-sparql-api-path-finding/src/test/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/TestConceptPathFinder.java b/jena-sparql-api-path-finding/src/test/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/TestConceptPathFinder.java index eba5bc746..2e535c7e4 100644 --- a/jena-sparql-api-path-finding/src/test/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/TestConceptPathFinder.java +++ b/jena-sparql-api-path-finding/src/test/java/org/aksw/jena_sparql_api/sparql_path/impl/bidirectional/TestConceptPathFinder.java @@ -25,60 +25,60 @@ public class TestConceptPathFinder { - - private static final Logger logger = LoggerFactory.getLogger(TestConceptPathFinder.class); - - - @Test - public void testConceptPathFinder() throws IOException, ParseException { - - // Load some test data and create a sparql connection to it - Dataset ds = RDFDataMgr.loadDataset("concept-path-finder-test-data.ttl"); - RDFConnection dataConnection = RDFConnectionFactory.connect(ds); - - //dataConnection.update("DELETE WHERE { ?s a ?t }"); - - // Set up a path finding system - ConceptPathFinderSystem system = new ConceptPathFinderSystemBidirectional(); - - // Use the system to compute a data summary - // Note, that the summary could be loaded from any place, such as a file used for caching - Model dataSummary = system.computeDataSummary(dataConnection).blockingGet(); - - RDFDataMgr.write(System.out, dataSummary, RDFFormat.TURTLE_PRETTY); - - // Build a path finder; for this, first obtain a factory from the system - // set its attributes and eventually build the path finder. - ConceptPathFinder pathFinder = system.newPathFinderBuilder() - .setDataSummary(dataSummary) - .setDataConnection(dataConnection) - .setShortestPathsOnly(false) - .build(); - - - //Concept.parse("?s | ?s ?p [ a eg:D ]", PrefixMapping.Extended), - - // Create search for paths between two given sparql concepts - PathSearch pathSearch = pathFinder.createSearch( - Concept.parse("?s | ?s eg:cd ?o", PrefixMapping.Extended), - Concept.parse("?s | ?s eg:ab ?o", PrefixMapping.Extended)); - //Concept.parse("?s | ?s a eg:A", PrefixMapping.Extended)); - - // Set parameters on the search, such as max path length and the max number of results - // Invocation of .exec() executes the search and yields the flow of results - List actual = pathSearch - .setMaxPathLength(3) - //.setMaxResults(100) - .exec() - .toList().blockingGet(); + + private static final Logger logger = LoggerFactory.getLogger(TestConceptPathFinder.class); + + + @Test + public void testConceptPathFinder() throws IOException, ParseException { + + // Load some test data and create a sparql connection to it + Dataset ds = RDFDataMgr.loadDataset("concept-path-finder-test-data.ttl"); + RDFConnection dataConnection = RDFConnectionFactory.connect(ds); + + //dataConnection.update("DELETE WHERE { ?s a ?t }"); + + // Set up a path finding system + ConceptPathFinderSystem system = new ConceptPathFinderSystemBidirectional(); + + // Use the system to compute a data summary + // Note, that the summary could be loaded from any place, such as a file used for caching + Model dataSummary = system.computeDataSummary(dataConnection).blockingGet(); + + RDFDataMgr.write(System.out, dataSummary, RDFFormat.TURTLE_PRETTY); + + // Build a path finder; for this, first obtain a factory from the system + // set its attributes and eventually build the path finder. + ConceptPathFinder pathFinder = system.newPathFinderBuilder() + .setDataSummary(dataSummary) + .setDataConnection(dataConnection) + .setShortestPathsOnly(false) + .build(); + + + //Concept.parse("?s | ?s ?p [ a eg:D ]", PrefixMapping.Extended), + + // Create search for paths between two given sparql concepts + PathSearch pathSearch = pathFinder.createSearch( + Concept.parse("?s { ?s eg:cd ?o }", PrefixMapping.Extended), + Concept.parse("?s { ?s eg:ab ?o }", PrefixMapping.Extended)); + //Concept.parse("?s | ?s a eg:A", PrefixMapping.Extended)); + + // Set parameters on the search, such as max path length and the max number of results + // Invocation of .exec() executes the search and yields the flow of results + List actual = pathSearch + .setMaxPathLength(3) + //.setMaxResults(100) + .exec() + .toList().blockingGet(); // System.out.println("Paths"); // actual.forEach(System.out::println); - // TODO Simply specification of reference paths such as by adding a Path.parse method - List expected = Arrays.asList( - SimplePath.fromPropertyPath(PathParser.parse("^eg:bc/^eg:ab", PrefixMapping.Extended))); + // TODO Simply specification of reference paths such as by adding a Path.parse method + List expected = Arrays.asList( + SimplePath.fromPropertyPath(PathParser.parse("^eg:bc/^eg:ab", PrefixMapping.Extended))); - Assert.assertEquals(expected, actual); - } + Assert.assertEquals(expected, actual); + } } diff --git a/jena-sparql-api-playground/pom.xml b/jena-sparql-api-playground/pom.xml index 48ffb1fba..d3e1f41ff 100644 --- a/jena-sparql-api-playground/pom.xml +++ b/jena-sparql-api-playground/pom.xml @@ -12,7 +12,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-playground/src/main/java/org/aksw/jena_sparql_api/playground/fuseki/MainDemoQueryFluent.java b/jena-sparql-api-playground/src/main/java/org/aksw/jena_sparql_api/playground/fuseki/MainDemoQueryFluent.java index 6a912bfcb..40bcd543b 100644 --- a/jena-sparql-api-playground/src/main/java/org/aksw/jena_sparql_api/playground/fuseki/MainDemoQueryFluent.java +++ b/jena-sparql-api-playground/src/main/java/org/aksw/jena_sparql_api/playground/fuseki/MainDemoQueryFluent.java @@ -12,16 +12,13 @@ public class MainDemoQueryFluent { public static void main(String[] args) { -// RDFConnection conn = RDFConnectionRemote.create() -// .destination("https://databus.dbpedia.org/repo/sparql") -// .build(); SparqlService ss = FluentSparqlService .http("https://databus.dbpedia.org/repo/sparql") .config() .configQuery() .withDelay(1, TimeUnit.SECONDS) - .withCache(new CacheBackendFile(Paths.get("/tmp/cache"), 600000l, true, false, true)) + // .withCache(new CacheBackendFile(Paths.get("/tmp/cache"), 600000l, true, false, true)) .withPagination(100) .withDefaultLimit(10, true) .end() @@ -29,8 +26,6 @@ public static void main(String[] args) { .create(); try(RDFConnection baseConn = ss.getRDFConnection()) { -// Model m = ModelFactory.createModelForGraph(new GraphFromSparqlQueryConnection(baseConn)); -// try(RDFConnection appConn = RDFConnectionFactory.connect(DatasetFactory.wrap(m))) { try(RDFConnection appConn = baseConn) { String queryStr = "SELECT * { ?s a ; }"; SparqlRx.execSelect(appConn, queryStr) diff --git a/jena-sparql-api-playground/src/test/java/org/aksw/jena_sparql_api/playground/LoticoExamples.java b/jena-sparql-api-playground/src/test/java/org/aksw/jena_sparql_api/playground/LoticoExamples.java index e000d782e..123b8db35 100644 --- a/jena-sparql-api-playground/src/test/java/org/aksw/jena_sparql_api/playground/LoticoExamples.java +++ b/jena-sparql-api-playground/src/test/java/org/aksw/jena_sparql_api/playground/LoticoExamples.java @@ -2,14 +2,25 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.file.Paths; import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.concurrent.TimeUnit; import org.aksw.jena_sparql_api.algebra.expr.transform.ExprTransformVirtualBnodeUris; -import org.aksw.jena_sparql_api.core.RDFConnectionFactoryEx; +import org.aksw.jena_sparql_api.algebra.utils.VirtualPartitionedQuery; +import org.aksw.jena_sparql_api.cache.file.CacheBackendFile; +import org.aksw.jena_sparql_api.concepts.Concept; +import org.aksw.jena_sparql_api.concepts.TernaryRelation; +import org.aksw.jena_sparql_api.concepts.TernaryRelationImpl; +import org.aksw.jena_sparql_api.core.SparqlService; +import org.aksw.jena_sparql_api.core.connection.RDFConnectionFactoryEx; import org.aksw.jena_sparql_api.rx.SparqlRx; import org.aksw.jena_sparql_api.stmt.SparqlStmtMgr; +import org.aksw.jena_sparql_api.update.FluentSparqlService; +import org.aksw.jena_sparql_api.utils.Vars; import org.apache.jena.query.DatasetFactory; import org.apache.jena.query.Query; import org.apache.jena.query.QueryExecution; @@ -20,11 +31,43 @@ import org.apache.jena.rdfconnection.RDFConnection; import org.apache.jena.rdfconnection.RDFConnectionFactory; import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.shared.PrefixMapping; import org.apache.jena.sparql.lang.arq.ParseException; import org.junit.Test; public class LoticoExamples { + /* + * Pimp my query execution + * + */ + @Test + public void testEnhancedQueryExecution() { + + SparqlService ss = FluentSparqlService + .http("https://databus.dbpedia.org/repo/sparql") + .config() + .configQuery() + .withDelay(1, TimeUnit.SECONDS) + // .withCache(new CacheBackendMem()) + .withCache(new CacheBackendFile(Paths.get("/tmp/cache"), 600000l, true, false, true)) + .withPagination(100) + .withDefaultLimit(10, true) + .end() + .end() + .create(); + + try(RDFConnection baseConn = ss.getRDFConnection()) { + try(RDFConnection appConn = baseConn) { + String queryStr = "SELECT * { ?s a ;" + + " }"; + SparqlRx.execSelect(appConn, queryStr) + .forEach(qs -> System.out.println(qs)); + } + } + + } + /* * SPARQL Extensions */ @@ -70,8 +113,6 @@ public void testBnodeRaw() { } public static RDFConnection wrapWithVirtualBnodeUris(RDFConnection conn, String profile) { - //ExprTransformVirtualBnodeUris xform = new ExprTransformVirtualBnodeUris(vendorLabel, bnodeLabelFn); - Model model = RDFDataMgr.loadModel("bnode-rewrites.ttl"); SparqlStmtMgr.execSparql(model, "udf-inferences.sparql"); @@ -110,4 +151,46 @@ public void testBnodeSkolemized() { } } + + /* + * Query over Views - Wikidata + * + * Given: + * + * wd:P400 a wikibase:Property + * rdfs:label "platform" ; + * wikibase:claim p:400 . + * + * p:400 a ObjectProperty . + * + * Goal: + * p:400 a ObjectProperty ; + * rdfs:label "platform" . + * + * + * SELECT * { ?s ?p ?o . FILTER(?x = ?x ; ?p ?o FILTER(?x = ) } + */ + @Test + public void testQueryOverViews() { + List views = Arrays.asList( + new TernaryRelationImpl(Concept.parseElement("{ ?s ?p ?o FILTER(?p = rdf:type && ?o = owl:ObjectProperty) }", PrefixMapping.Extended), Vars.s, Vars.p, Vars.o), + new TernaryRelationImpl(Concept.parseElement( + "{ ?c ?p ; ?x ?y }", null), Vars.p, Vars.x, Vars.y) + ); + + String queryStr = "SELECT ?s ?o { ?s a ; ?o . FILTER(?s = )}"; + + try(RDFConnection rawConn = RDFConnectionFactory.connect("https://query.wikidata.org/sparql")) { + RDFConnection conn = RDFConnectionFactoryEx.wrapWithQueryTransform(rawConn, query -> { + Query rewritten = VirtualPartitionedQuery.rewrite(views, query); + System.out.println(rewritten); + return rewritten; + }); + + try(QueryExecution qe = conn.query(queryStr)) { + System.out.println(ResultSetFormatter.asText(qe.execSelect())); + } + } + } } diff --git a/jena-sparql-api-query-containment/pom.xml b/jena-sparql-api-query-containment/pom.xml index 943dbb1b7..23345b5a9 100644 --- a/jena-sparql-api-query-containment/pom.xml +++ b/jena-sparql-api-query-containment/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-rdf-stream/pom.xml b/jena-sparql-api-rdf-stream/pom.xml index 13ac2c411..f19810758 100644 --- a/jena-sparql-api-rdf-stream/pom.xml +++ b/jena-sparql-api-rdf-stream/pom.xml @@ -8,7 +8,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-rdf-view/pom.xml b/jena-sparql-api-rdf-view/pom.xml index 41a1a873e..ead61160a 100644 --- a/jena-sparql-api-rdf-view/pom.xml +++ b/jena-sparql-api-rdf-view/pom.xml @@ -8,7 +8,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-resource-shape/pom.xml b/jena-sparql-api-resource-shape/pom.xml index 7b769f91d..b3fb92826 100644 --- a/jena-sparql-api-resource-shape/pom.xml +++ b/jena-sparql-api-resource-shape/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-resources-sparqlqc/pom.xml b/jena-sparql-api-resources-sparqlqc/pom.xml index 934aca096..892abfb43 100644 --- a/jena-sparql-api-resources-sparqlqc/pom.xml +++ b/jena-sparql-api-resources-sparqlqc/pom.xml @@ -8,7 +8,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-resources-test-config/pom.xml b/jena-sparql-api-resources-test-config/pom.xml index 651fa07d2..27a2f4de1 100644 --- a/jena-sparql-api-resources-test-config/pom.xml +++ b/jena-sparql-api-resources-test-config/pom.xml @@ -9,7 +9,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-resources/pom.xml b/jena-sparql-api-resources/pom.xml index fecfc54e8..8360d01e5 100644 --- a/jena-sparql-api-resources/pom.xml +++ b/jena-sparql-api-resources/pom.xml @@ -12,7 +12,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-resources/src/main/resources/xsd-ontology.ttl b/jena-sparql-api-resources/src/main/resources/xsd-ontology.ttl new file mode 100644 index 000000000..3c434b6ff --- /dev/null +++ b/jena-sparql-api-resources/src/main/resources/xsd-ontology.ttl @@ -0,0 +1,47 @@ +PREFIX xsd: +PREFIX rdf: + +xsd:negativeInteger rdfs:subClassOf xsd:nonPositiveInteger . +xsd:nonPositiveInteger rdfs:subClassOfxsd:integer . + +xsd:byte rdfs:subClassOf xsd:short . +xsd:short rdfs:subClassOf xsd:int . +xsd:int rdfs:subClassOfxsd:long . +xsd:long rdfs:subClassOf xsd:integer . + +xsd:unsignedByte rdfs:subClassOf xsd:unsignedShort . +xsd:unsignedShort rdfs:subClassOf xsd:unsignedInt . +xsd:unsignedInt rdfs:subClassOf xsd:unsignedLong . +xsd:unsignedLong rdfs:subClassOf xsd:nonNegativeInteger . + +xsd:positiveInteger rdfs:subClassOf xsd:nonNegativeInteger . + +xsd:nonNegativeInteger rdfs:subClassOf xsd:integer . +xsd:integer rdfs:subClassOf xsd:decimal . + +xsd:string rdfs:subClassOf xsd:anySimpleType . +xsd:boolean rdfs:subClassOf xsd:anySimpleType . +xsd:base64Binary rdfs:subClassOf xsd:anySimpleType . +xsd:hexBinary rdfs:subClassOf xsd:anySimpleType . +xsd:float rdfs:subClassOf xsd:anySimpleType . +xsd:decimal rdfs:subClassOf xsd:anySimpleType . +xsd:double rdfs:subClassOf xsd:anySimpleType . +xsd:anyURI rdfs:subClassOf xsd:anySimpleType . +xsd:QName rdfs:subClassOf xsd:anySimpleType . +xsd:NOTATION rdfs:subClassOf xsd:anySimpleType . + +xsd:duration rdfs:subClassOf xsd:anySimpleType . +xsd:dateTime rdfs:subClassOf xsd:anySimpleType . +xsd:time rdfs:subClassOf xsd:anySimpleType . +xsd:date rdfs:subClassOf xsd:anySimpleType . +xsd:gYearMonth rdfs:subClassOf xsd:anySimpleType . +xsd:gYear rdfs:subClassOf xsd:anySimpleType . +xsd:gMonthDay rdfs:subClassOf xsd:anySimpleType . +xsd:gDay rdfs:subClassOf xsd:anySimpleType . +xsd:gMonth rdfs:subClassOf xsd:anySimpleType . + + +#xsd:double http://ld.sparqlify.org/resource/type/numeric +#xsd:float http://ld.sparqlify.org/resource/type/numeric +#xsd:int http://ld.sparqlify.org/resource/type/numeric + diff --git a/jena-sparql-api-rx/pom.xml b/jena-sparql-api-rx/pom.xml index cfccaa18e..a27ea3623 100644 --- a/jena-sparql-api-rx/pom.xml +++ b/jena-sparql-api-rx/pom.xml @@ -13,17 +13,15 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 org.apache.jena jena-dboe-storage - 3.15.0 - org.aksw.jena-sparql-api jena-sparql-api-concepts diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/commons/io/StdIo.java b/jena-sparql-api-rx/src/main/java/org/aksw/commons/io/StdIo.java new file mode 100644 index 000000000..6098b1142 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/commons/io/StdIo.java @@ -0,0 +1,29 @@ +package org.aksw.commons.io; + +import java.io.FileDescriptor; +import java.io.FileOutputStream; +import java.io.OutputStream; + +import org.apache.commons.io.output.CloseShieldOutputStream; + +/** + * A small util class to open close-shielded output streams to stdout and stderr + * + * @author raven + * + */ +public class StdIo { + + public static OutputStream openStdout() { + return new CloseShieldOutputStream(new FileOutputStream(FileDescriptor.out)); + } + + public static OutputStream openStderr() { + return new CloseShieldOutputStream(new FileOutputStream(FileDescriptor.err)); + } + +// public static OutputStream openStdin() { +// return new CloseShieldInputStream(new FileInputStream(FileDescriptor.in)); +// } + +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/QuadTableFromNestedMaps.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/QuadTableFromNestedMaps.java index 07bcb0889..ca0ed978d 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/QuadTableFromNestedMaps.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/QuadTableFromNestedMaps.java @@ -5,7 +5,10 @@ import org.apache.jena.graph.Node; import org.apache.jena.query.ReadWrite; +import org.apache.jena.query.TxnType; import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.core.Transactional; +import org.apache.jena.sparql.core.Transactional.Promote; import org.apache.jena.sparql.core.mem.QuadTable; @@ -18,7 +21,7 @@ * */ public class QuadTableFromNestedMaps - implements QuadTable + implements QuadTable, Transactional { public static class TxnState { ReadWrite mode; @@ -110,4 +113,36 @@ public Stream listGraphNodes() { .filter(node -> !Quad.isDefaultGraph(node)); } + @Override + public void begin(TxnType type) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean promote(Promote mode) { + throw new UnsupportedOperationException(); + } + + @Override + public ReadWrite transactionMode() { + throw new UnsupportedOperationException(); + } + + @Override + public TxnType transactionType() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isInTransaction() { + TxnState txnState = local().get(); + boolean result = txnState != null; + return result; + } + + @Override + public void abort() { + throw new UnsupportedOperationException(); + } + } diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCore.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCore.java index 90af31fdd..fed6be96c 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCore.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCore.java @@ -11,6 +11,11 @@ public interface TripleTableCore { void delete(Triple triple); Stream find(Node s, Node p, Node o); + default Stream find() { + return find(Node.ANY, Node.ANY, Node.ANY); + } + + default boolean isEmpty() { return !find(Node.ANY, Node.ANY, Node.ANY) .findAny().isPresent(); diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCoreFromNestedMapsImpl.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCoreFromNestedMapsImpl.java index 58c8e4957..4b11c010a 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCoreFromNestedMapsImpl.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/dboe/TripleTableCoreFromNestedMapsImpl.java @@ -6,12 +6,10 @@ import java.util.stream.Stream; import org.aksw.jena_sparql_api.dboe.QuadTableCoreFromNestedMapsImpl.MapSupplier; -import org.apache.jena.ext.com.google.common.collect.ForwardingMap; import org.apache.jena.graph.Node; import org.apache.jena.graph.Triple; public class TripleTableCoreFromNestedMapsImpl - extends ForwardingMap>> implements TripleTableCore { protected Map>> store; @@ -29,11 +27,6 @@ public TripleTableCoreFromNestedMapsImpl(MapSupplier mapSupplier) { store = mapSupplier.newMap(); } - @Override - protected Map>> delegate() { - return store; - } - @Override public void clear() { store.clear(); @@ -124,6 +117,8 @@ public static void delete( } } + + // public static Map>>> copy( // Stream stream, // MapSupplier mapSupplier diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/AggCollection.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/AggCollection.java new file mode 100644 index 000000000..813993846 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/AggCollection.java @@ -0,0 +1,56 @@ +package org.aksw.jena_sparql_api.rx; + +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.function.Supplier; + +import org.aksw.jena_sparql_api.mapper.Accumulator; +import org.aksw.jena_sparql_api.mapper.Aggregator; + +public class AggCollection + implements Aggregator +{ + + protected Supplier collectionSupplier; + protected Function bindingToItem; + protected BiConsumer addToCollection; + + public AggCollection( + Supplier collector, + Function bindingToItem, + BiConsumer addToCollection + ) { + super(); + this.collectionSupplier = collector; + this.bindingToItem = bindingToItem; + this.addToCollection = addToCollection; + } + + @Override + public Accumulator createAccumulator() { + COLLECTION collection = collectionSupplier.get(); + return new AccCollection(collection); + } + + public class AccCollection + implements Accumulator + { + protected COLLECTION collection; + + public AccCollection(COLLECTION collection) { + super(); + this.collection = collection; + } + + @Override + public void accumulate(T binding) { + ITEM item = bindingToItem.apply(binding); + addToCollection.accept(collection, item); + } + + @Override + public COLLECTION getValue() { + return collection; + } + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/AggObjectGraph.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/AggObjectGraph.java new file mode 100644 index 000000000..30537ef9e --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/AggObjectGraph.java @@ -0,0 +1,132 @@ +package org.aksw.jena_sparql_api.rx; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; + +import org.aksw.jena_sparql_api.mapper.Accumulator; +import org.aksw.jena_sparql_api.mapper.Aggregator; +import org.apache.jena.ext.com.google.common.collect.HashMultimap; +import org.apache.jena.ext.com.google.common.collect.SetMultimap; +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.engine.binding.Binding; +import org.apache.jena.sparql.modify.TemplateLib; +import org.apache.jena.sparql.syntax.Template; + +/** + * An aggregator whose accumulators create RDF graphs (triples) from the bindings passed to them. + * At the core this is exactly that of SPARQL construct query, however this class supports an extension: + * + * Blank nodes of the template can be remapped via a custom function based on the binding. + * Template blank nodes that aree not remapped by the function will be mapped to fresh blank nodes + * on every binding. + * + * + * @author raven + * + */ +public class AggObjectGraph + implements Aggregator +// implements Acc { +{ + protected Template template; + /** + * Mapping of a subset of the bnodes in the template to + * functions that generate a local id from a binding passed to this accumulator + */ + protected Map> nodeIdGenMap; + + protected Supplier graphSupplier; + + // Accumulator will keep track of the remapping of blank nodes in this set + protected Set trackedTemplateNodes; + + public AggObjectGraph( + Template template, + Set trackedTemplateNodes, + Supplier graphSupplier, + Map> nodeIdGenMap) { + super(); + this.template = template; + this.trackedTemplateNodes = trackedTemplateNodes; + this.graphSupplier = graphSupplier; + this.nodeIdGenMap = nodeIdGenMap; + } + + @Override + public AccObjectGraph createAccumulator() { + Graph graph = graphSupplier.get(); + return new AccObjectGraph(graph); + } + + + public class AccObjectGraph + implements Accumulator + { + protected Graph graph; + protected SetMultimap templateNodeToInsts; + + public AccObjectGraph(Graph graph) { + super(); + this.graph = graph; + this.templateNodeToInsts = HashMultimap.create(); + } + + public void accumulate(Binding binding) { + Map bnodeMap = new HashMap<>(); + + for (Entry> nodeIdGen : nodeIdGenMap.entrySet()) { + Node templateNode = nodeIdGen.getKey(); + Function idGen = nodeIdGen.getValue(); + + Node id = idGen.apply(binding); + bnodeMap.put(templateNode, id); + } + + for(Triple t : template.getTriples()) { + Triple newT = TemplateLib.subst(t, binding, bnodeMap); + if(newT.isConcrete()) { + graph.add(newT); + } + } + + for (Node templateNode : trackedTemplateNodes) { + Node remapped; + if (templateNode.isVariable()) { + Var var = (Var)templateNode; + remapped = binding.get(var); + } else { + remapped = bnodeMap.get(templateNode); + } + + if (remapped != null) { + templateNodeToInsts.put(templateNode, remapped); + } + } + } + + /** + * Return for a given node of the template all the instances + * (either from bindings or from bnode mapping) + * that were generated in this accumulator + * + * @param templateNode + * @return + */ + public Set getTrackedNodes(Node templateNode) { + Set result = templateNodeToInsts.get(templateNode); + return result; + } + + // @Override + public Graph getValue() { + return graph; + } + } +} \ No newline at end of file diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetFactoryEx.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetFactoryEx.java index c455b562c..bbee543d6 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetFactoryEx.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetFactoryEx.java @@ -3,11 +3,7 @@ import org.apache.jena.query.Dataset; import org.apache.jena.query.DatasetFactory; import org.apache.jena.sparql.core.DatasetGraph; -import org.apache.jena.sparql.core.DatasetGraphFactory; import org.apache.jena.sparql.core.Quad; -import org.apache.jena.sparql.core.mem.DatasetGraphInMemory; -import org.apache.jena.sparql.core.mem.QuadTable; -import org.apache.jena.sparql.core.mem.TripleTable; public class DatasetFactoryEx { diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetGraphQuadsImpl.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetGraphQuadsImpl.java index 05c551bae..ee6e3bcc7 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetGraphQuadsImpl.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/DatasetGraphQuadsImpl.java @@ -1,7 +1,10 @@ package org.aksw.jena_sparql_api.rx; +import static org.apache.jena.system.Txn.calculateRead; + import java.util.Collections; import java.util.Iterator; +import java.util.function.Supplier; import org.aksw.jena_sparql_api.dboe.QuadTableFromNestedMaps; import org.apache.jena.graph.Graph; @@ -11,149 +14,164 @@ import org.apache.jena.sparql.core.DatasetGraphQuads; import org.apache.jena.sparql.core.GraphView; import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.core.Transactional; import org.apache.jena.sparql.core.mem.QuadTable; +import org.apache.jena.system.Txn; public class DatasetGraphQuadsImpl - extends DatasetGraphQuads + extends DatasetGraphQuads { - protected QuadTable table; - - public DatasetGraphQuadsImpl() { - this(new QuadTableFromNestedMaps()); - } - - public DatasetGraphQuadsImpl(QuadTable table) { - super(); - this.table = table; - } - - @Override - public boolean supportsTransactions() { - // FIXME We fake a true value, because otherwise any update causes an exception - return true; - } - - @Override - public void begin(TxnType type) { - } - - @Override - public void begin(ReadWrite readWrite) { - table.begin(readWrite); - } - - @Override - public boolean promote(Promote mode) { - return false; - } - - @Override - public void commit() { - table.commit(); - } - - @Override - public void abort() { - table.abort(); - } - - @Override - public void end() { - table.end(); - } - - @Override - public ReadWrite transactionMode() { - return null; - } - - @Override - public TxnType transactionType() { - return null; - } - - @Override - public boolean isInTransaction() { - return false; - } - - @Override - public Iterator find(Node g, Node s, Node p, Node o) { - Iterator result = table.find(g, s, p, o).iterator(); - return result; - } - - @Override - public Iterator findNG(Node g, Node s, Node p, Node o) { - - Node gm = g == null || Quad.isUnionGraph(g) ? Node.ANY : g; - - Iterator result; - if(Quad.isDefaultGraph(gm)) { - result = Collections.emptyIterator(); + protected QuadTable table; + + public DatasetGraphQuadsImpl() { + this(new QuadTableFromNestedMaps()); + } + + public DatasetGraphQuadsImpl(QuadTable table) { + super(); + this.table = table; + } + + @Override + public boolean supportsTransactions() { + return true; + } + + @Override + public void begin(TxnType type) { + table.begin(TxnType.convert(type)); + } + + @Override + public void begin(ReadWrite readWrite) { + table.begin(readWrite); + } + + @Override + public boolean promote(Promote mode) { + return false; + } + + @Override + public void commit() { + table.commit(); + } + + @Override + public void abort() { + table.abort(); + } + + @Override + public void end() { + table.end(); + } + + @Override + public ReadWrite transactionMode() { + throw new UnsupportedOperationException(); + } + + @Override + public TxnType transactionType() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isInTransaction() { + boolean result = ((Transactional)table).isInTransaction(); + return result; + } + + private void access(final Runnable source) { + if (isInTransaction()) { + source.run(); + } else { + Txn.executeRead(this, source::run); + } + } + + private T access(final Supplier source) { + return isInTransaction() ? source.get() : calculateRead(this, source::get); + } + + @Override + public Iterator find(Node g, Node s, Node p, Node o) { + Iterator result = access(() -> table.find(g, s, p, o).iterator()); + return result; + } + + @Override + public Iterator findNG(Node g, Node s, Node p, Node o) { + + Node gm = g == null || Quad.isUnionGraph(g) ? Node.ANY : g; + + Iterator result; + if(Quad.isDefaultGraph(gm)) { + result = Collections.emptyIterator(); // } else if(Quad.isUnionGraph(gm)) { // result = GraphOps.unionGraph(this).find(s, p, o).mapWith(t -> new Quad(Quad.unionGraph, t)); - } else { - result = table.find(gm, s, p, o) - .filter(q -> !Quad.isDefaultGraph(q.getGraph())) - .iterator(); - - } - - return result; - } - - @Override - public void add(Quad quad) { - table.add(quad); - } - - @Override - public void delete(Quad quad) { - table.delete(quad); - } - - @Override - public Graph getDefaultGraph() { - return GraphView.createDefaultGraph(this); - } - - @Override - public Graph getGraph(Node graphNode) { - return GraphView.createNamedGraph(this, graphNode); - } - - @Override - public void addGraph(Node graphName, Graph graph) { - graph.find().forEachRemaining(t -> add(new Quad(graphName, t))); - } - + } else { + result = access(() -> table.find(gm, s, p, o) + .filter(q -> !Quad.isDefaultGraph(q.getGraph())) + .iterator()); + + } + + return result; + } + + @Override + public void add(Quad quad) { + access(() -> table.add(quad)); + } + + @Override + public void delete(Quad quad) { + access(() -> table.delete(quad)); + } + + @Override + public Graph getDefaultGraph() { + return GraphView.createDefaultGraph(this); + } + + @Override + public Graph getGraph(Node graphNode) { + return GraphView.createNamedGraph(this, graphNode); + } + + @Override + public void addGraph(Node graphName, Graph graph) { + graph.find().forEachRemaining(t -> add(new Quad(graphName, t))); + } + @Override public Iterator listGraphNodes() { - return table.listGraphNodes().iterator(); + return access(() -> table.listGraphNodes().iterator()); } - + public static DatasetGraphQuadsImpl create(Iterator it) { - DatasetGraphQuadsImpl result = new DatasetGraphQuadsImpl(); - while(it.hasNext()) { - Quad quad = it.next(); - result.add(quad); - } - return result; + DatasetGraphQuadsImpl result = new DatasetGraphQuadsImpl(); + while(it.hasNext()) { + Quad quad = it.next(); + result.add(quad); + } + return result; } public static DatasetGraphQuadsImpl create(Iterable quads) { - DatasetGraphQuadsImpl result = new DatasetGraphQuadsImpl(); - quads.forEach(result::add); - return result; + DatasetGraphQuadsImpl result = new DatasetGraphQuadsImpl(); + quads.forEach(result::add); + return result; } - + @Override public long size() { - // Comparing with DatasetFactory.create() it seems the count is just - // the number of named graphs (excluding the default graph) - - //return table.listGraphNodes().count(); // - return table.find(Node.ANY, Node.ANY, Node.ANY, Node.ANY).count(); + // Comparing with DatasetFactory.create() it seems the count is just + // the number of named graphs (excluding the default graph) + + //return table.listGraphNodes().count(); // + return table.find(Node.ANY, Node.ANY, Node.ANY, Node.ANY).count(); } } diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/EntityBaseQuery.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/EntityBaseQuery.java new file mode 100644 index 000000000..79f19e329 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/EntityBaseQuery.java @@ -0,0 +1,124 @@ +package org.aksw.jena_sparql_api.rx; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.aksw.jena_sparql_api.rx.entity.model.EntityTemplate; +import org.aksw.jena_sparql_api.rx.entity.model.EntityTemplateImpl; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.query.SortCondition; +import org.apache.jena.sparql.core.BasicPattern; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.syntax.Template; + +/** + * A query with a sequence of designated partition variables + * + * @author raven + * + */ +public class EntityBaseQuery + implements Cloneable +{ + /** + * The standard query can be SPARQL SELECT query + * + */ + protected Query standardQuery; + + protected EntityTemplate entityTemplate; + protected List partitionVars; + protected List partitionOrderBy; + + public EntityBaseQuery( + List partitionVars, + EntityTemplate entityTemplate, + Query standardQuery) { + this(partitionVars, entityTemplate, standardQuery, new ArrayList<>()); + } + + public EntityBaseQuery( + List partitionVars, + EntityTemplate entityTemplate, + Query standardQuery, + List partitionOrderBy) { + super(); + this.standardQuery = standardQuery; + this.entityTemplate = entityTemplate; + this.partitionVars = partitionVars; + this.partitionOrderBy = partitionOrderBy; + } + +// @Override + public EntityBaseQuery cloneQuery() { + return new EntityBaseQuery( + new ArrayList<>(partitionVars), + entityTemplate.cloneTemplate(), + standardQuery.cloneQuery(), + new ArrayList<>(partitionOrderBy)); + } + + public Query getStandardQuery() { + return standardQuery; + } + + public EntityTemplate getEntityTemplate() { + return entityTemplate; + } + + public void setEntityTemplate(EntityTemplate entityTemplate) { + this.entityTemplate = entityTemplate; + } + + public void setStandardQuery(Query standardQuery) { + this.standardQuery = standardQuery; + } + + public List getPartitionVars() { + return partitionVars; + } + + public void setPartitionVars(List partitionVars) { + this.partitionVars = partitionVars; + } + + public List getPartitionOrderBy() { + return partitionOrderBy; + } + + @Override + public String toString() { + String result + = "ENTITY " + partitionVars + "\n" + + "CONSTRUCT " + entityTemplate + "\n" + + "WHERE " + standardQuery + "\n" + + "ORDER ENTITIES BY " + partitionOrderBy; + + return result; + } + + public static EntityBaseQuery create(Var partitionAndEntityVar, Query standardQuery) { + Query partitionSelect = standardQuery.cloneQuery(); + partitionSelect.setQuerySelectType(); + partitionSelect.setQueryResultStar(true); + + List partitionVars = Collections.singletonList(partitionAndEntityVar); + List entityNodes = Collections.singletonList(partitionAndEntityVar); + + Template template = standardQuery.getConstructTemplate(); + if (template == null) { + template = new Template(new BasicPattern()); + } + + EntityTemplateImpl et = new EntityTemplateImpl( + entityNodes, + template); + + EntityBaseQuery result = new EntityBaseQuery(partitionVars, et, partitionSelect); + + return result; + } +} + diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/EntityGraphFragment.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/EntityGraphFragment.java new file mode 100644 index 000000000..47cd74d84 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/EntityGraphFragment.java @@ -0,0 +1,106 @@ +package org.aksw.jena_sparql_api.rx; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.aksw.jena_sparql_api.rx.entity.model.EntityTemplate; +import org.aksw.jena_sparql_api.rx.entity.model.EntityTemplateImpl; +import org.aksw.jena_sparql_api.utils.ElementUtils; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.graph.NodeTransform; +import org.apache.jena.sparql.graph.NodeTransformLib; +import org.apache.jena.sparql.syntax.Element; +import org.apache.jena.sparql.syntax.ElementGroup; + +/** + * A simplified construct query that is just comprised of a sequence of + * entity variables, a graph pattern and a template. + * I.e. a mapping of a relation to triples with designated partition variables + * + * @author raven + * + */ +public class EntityGraphFragment { + protected List partitionVars; + protected EntityTemplate entityTemplate; + protected Element element; + + public EntityGraphFragment() { + this(new ArrayList<>(), new EntityTemplateImpl(), null); + } + + public EntityGraphFragment(List partitionVars, EntityTemplate entityTemplate, Element element) { + super(); + this.partitionVars = partitionVars; + this.entityTemplate = entityTemplate; + this.element = element; + } + + public static EntityGraphFragment empty(List partitionVars) { + return new EntityGraphFragment(partitionVars, + new EntityTemplateImpl(), + new ElementGroup()); + } + + public static EntityGraphFragment fromQuery(Var entityVar, Query query) { + return fromQuery(Arrays.asList(entityVar), Arrays.asList(entityVar), query); + } + + public static EntityGraphFragment fromQuery(List partitionVars, List entityNodes, Query query) { + EntityGraphFragment result = new EntityGraphFragment( + partitionVars, + new EntityTemplateImpl(entityNodes, query.getConstructTemplate()), + query.getQueryPattern()); + + return result; + } + + public EntityTemplate getEntityTemplate() { + return entityTemplate; + } + + public void setEntityTemplate(EntityTemplate entityTemplate) { + this.entityTemplate = entityTemplate; + } + + public Element getElement() { + return element; + } + + public void setElement(Element element) { + this.element = element; + } + + public List getPartitionVars() { + return partitionVars; + } + + public void setPartitionVars(List entityVars) { + this.partitionVars = entityVars; + } + +// @Override + public EntityGraphFragment applyNodeTransform(NodeTransform nodeTransform) { + EntityGraphFragment result = new EntityGraphFragment( + NodeTransformLib.transformVars(nodeTransform, partitionVars), + entityTemplate.applyNodeTransform(nodeTransform), + ElementUtils.applyNodeTransform(element, nodeTransform)); + + return result; + } + + + @Override + public String toString() { + String result + = "ENTITY " + partitionVars + "\n" + + "CONSTRUCT " + entityTemplate + "\n" + + "WHERE " + element; + + return result; + } + +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/ExprTransformAllocAggregate.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/ExprTransformAllocAggregate.java new file mode 100644 index 000000000..75a2b2969 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/ExprTransformAllocAggregate.java @@ -0,0 +1,26 @@ +package org.aksw.jena_sparql_api.rx; + +import org.apache.jena.query.Query; +import org.apache.jena.sparql.expr.Expr; +import org.apache.jena.sparql.expr.ExprAggregator; +import org.apache.jena.sparql.expr.ExprTransformCopy; +import org.apache.jena.sparql.expr.aggregate.Aggregator; + +public class ExprTransformAllocAggregate + extends ExprTransformCopy +{ + protected Query query; + + public ExprTransformAllocAggregate(Query query) { + super(); + this.query = query; + } + + @Override + public Expr transform(ExprAggregator eAgg) { + ExprAggregator newExpr = (ExprAggregator)super.transform(eAgg); + Aggregator agg = newExpr.getAggregator(); + Expr result = query.allocAggregate(agg); + return result; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/FlowableTransformerLocalOrdering.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/FlowableTransformerLocalOrderingOld.java similarity index 93% rename from jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/FlowableTransformerLocalOrdering.java rename to jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/FlowableTransformerLocalOrderingOld.java index 5e95cbf03..5abd7afa8 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/FlowableTransformerLocalOrdering.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/FlowableTransformerLocalOrderingOld.java @@ -20,6 +20,8 @@ import io.reactivex.rxjava3.core.FlowableTransformer; /** + * USE OperatorLocalOrder + * * A subscriber that performs local ordering of the items by their sequence id. * Local ordering means, that ordering is accomplished in a streaming fashion * without the need of a global view of *all* items. @@ -45,11 +47,11 @@ * @param * @param */ -public class FlowableTransformerLocalOrdering +public class FlowableTransformerLocalOrderingOld //implements Subscriber implements Emitter { - private static final Logger logger = LoggerFactory.getLogger(FlowableTransformerLocalOrdering.class); + private static final Logger logger = LoggerFactory.getLogger(FlowableTransformerLocalOrderingOld.class); protected FlowableEmitter delegate; //Consumer delegate; @@ -68,7 +70,7 @@ public class FlowableTransformerLocalOrdering protected NavigableMap seqIdToValue; - public FlowableTransformerLocalOrdering( + public FlowableTransformerLocalOrderingOld( S expectedSeqId, Function incrementSeqId, BiFunction distanceFn, @@ -198,11 +200,11 @@ public void onNext(T value) { public static Emitter forLong(long initiallyExpectedId, Function extractSeqId, FlowableEmitter delegate) { - return new FlowableTransformerLocalOrdering(initiallyExpectedId, id -> Long.valueOf(id.longValue() + 1l), (a, b) -> a - b, extractSeqId, delegate); + return new FlowableTransformerLocalOrderingOld(initiallyExpectedId, id -> Long.valueOf(id.longValue() + 1l), (a, b) -> a - b, extractSeqId, delegate); } - public static > FlowableTransformerLocalOrdering wrap(S initiallyExpectedId, Function incrementSeqId, BiFunction distanceFn, Function extractSeqId, FlowableEmitter delegate) { - return new FlowableTransformerLocalOrdering(initiallyExpectedId, incrementSeqId, distanceFn, extractSeqId, delegate); + public static > FlowableTransformerLocalOrderingOld wrap(S initiallyExpectedId, Function incrementSeqId, BiFunction distanceFn, Function extractSeqId, FlowableEmitter delegate) { + return new FlowableTransformerLocalOrderingOld(initiallyExpectedId, incrementSeqId, distanceFn, extractSeqId, delegate); } public static > FlowableTransformer transformer(S initiallyExpectedId, Function incrementSeqId, BiFunction distanceFn, Function extractSeqId) { @@ -212,7 +214,7 @@ public static > FlowableTransformer transformer @Override public void subscribe(FlowableEmitter e) throws Exception { - FlowableTransformerLocalOrdering tmp = wrap( + FlowableTransformerLocalOrderingOld tmp = wrap( initiallyExpectedId, incrementSeqId, distanceFn, diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrEx.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrEx.java index 8fe018baa..de6ab5658 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrEx.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrEx.java @@ -16,6 +16,7 @@ import org.apache.jena.ext.com.google.common.collect.ArrayListMultimap; import org.apache.jena.ext.com.google.common.collect.Multimap; import org.apache.jena.ext.com.google.common.collect.Streams; +import org.apache.jena.graph.Triple; import org.apache.jena.query.Dataset; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; @@ -26,6 +27,7 @@ import org.apache.jena.riot.RDFParser; import org.apache.jena.riot.resultset.ResultSetReaderRegistry; import org.apache.jena.shared.PrefixMapping; +import org.apache.jena.sparql.core.Quad; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -217,6 +219,9 @@ public static TypedInputStream open(String src, Iterable probeLangs) { if(useStdIn) { // Use the close shield to prevent closing stdin on .close() // TODO Investigate if this is redundant; RDFDataMgr might already do it + + // FIXME Does not work for encoded streams; for those we would have to go through + // Jena's StreamManager result = probeLang(new BufferedInputStream(System.in), probeLangs); } else { result = Objects.requireNonNull(RDFDataMgr.open(src), "Could not create input stream from " + src); @@ -246,6 +251,19 @@ public static TypedInputStream open(String src, Iterable probeLangs) { } + public static RDFIterator createIteratorTriples(PrefixMapping prefixMapping, InputStream in, Lang lang) { + InputStream combined = prependWithPrefixes(in, prefixMapping); + RDFIterator it = RDFDataMgrRx.createIteratorTriples(combined, lang, null, (thread, throwable) -> {}, thread -> {}); + return it; + } + + + public static RDFIterator createIteratorQuads(PrefixMapping prefixMapping, InputStream in, Lang lang) { + InputStream combined = prependWithPrefixes(in, prefixMapping); + RDFIterator it = RDFDataMgrRx.createIteratorQuads(combined, lang, null, (thread, throwable) -> {}, thread -> {}); + return it; + } + public static Dataset parseTrigAgainstDataset(Dataset dataset, PrefixMapping prefixMapping, InputStream in) { // Add namespaces from the spec // Apparently Jena does not support parsing against diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrRx.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrRx.java index f18793ce6..16c3e6e25 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrRx.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFDataMgrRx.java @@ -205,7 +205,7 @@ public static Flowable createFlowableDatasets(String filenameOrURI, Lan } - public static Iterator createIteratorQuads( + public static RDFIterator createIteratorQuads( InputStream in, Lang lang, String baseIRI, @@ -223,7 +223,7 @@ public static Iterator createIteratorQuads( th); } - public static Iterator createIteratorQuads( + public static RDFIterator createIteratorQuads( TypedInputStream in, UncaughtExceptionHandler eh, Consumer th) { @@ -235,7 +235,7 @@ public static Iterator createIteratorQuads( th); } - public static Iterator createIteratorTriples( + public static RDFIterator createIteratorTriples( InputStream in, Lang lang, String baseIRI, @@ -253,7 +253,7 @@ public static Iterator createIteratorTriples( eh); } - public static Iterator createIteratorTriples( + public static RDFIterator createIteratorTriples( TypedInputStream in, UncaughtExceptionHandler eh, Consumer th) { @@ -278,7 +278,7 @@ public static Iterator createIteratorTriples( * @param baseIRI Base IRI * @return Iterator over the quads */ - public static Iterator createIteratorQuads( + public static RDFIterator createIteratorQuads( InputStream input, Lang lang, String baseIRI, @@ -288,12 +288,12 @@ public static Iterator createIteratorQuads( // Special case N-Quads, because the RIOT reader has a pull interface if ( RDFLanguages.sameLang(RDFLanguages.NQUADS, lang) ) { - return new IteratorResourceClosing<>( + return new RDFIteratorFromIterator(new IteratorResourceClosing<>( RiotParsers.createIteratorNQuads(input, null, RDFDataMgrRx.dftProfile()), - input); + input), baseIRI); } // Otherwise, we have to spin up a thread to deal with it - final PipedRDFIterator it = new PipedRDFIterator<>(bufferSize, fair, pollTimeout, maxPolls); + final RDFIteratorFromPipedRDFIterator it = new RDFIteratorFromPipedRDFIterator<>(bufferSize, fair, pollTimeout, maxPolls); // Upgrade triples to quads; this happens if quads are requested from a triple lang final PipedQuadsStream out = new PipedQuadsStream(it) { @@ -337,7 +337,7 @@ public static ParserProfile dftProfile() { * @param baseIRI Base IRI * @return Iterator over the quads */ - public static Iterator createIteratorTriples( + public static RDFIterator createIteratorTriples( InputStream input, Lang lang, String baseIRI, @@ -346,12 +346,12 @@ public static Iterator createIteratorTriples( UncaughtExceptionHandler eh) { // Special case N-Quads, because the RIOT reader has a pull interface if ( RDFLanguages.sameLang(RDFLanguages.NTRIPLES, lang) ) { - return new IteratorResourceClosing<>( + return new RDFIteratorFromIterator(new IteratorResourceClosing<>( RiotParsers.createIteratorNTriples(input, null, RDFDataMgrRx.dftProfile()), - input); + input), baseIRI); } // Otherwise, we have to spin up a thread to deal with it - final PipedRDFIterator it = new PipedRDFIterator<>(bufferSize, fair, pollTimeout, maxPolls); + final RDFIteratorFromPipedRDFIterator it = new RDFIteratorFromPipedRDFIterator<>(bufferSize, fair, pollTimeout, maxPolls); final PipedTriplesStream out = new PipedTriplesStream(it); Thread t = new Thread(()-> { @@ -501,14 +501,15 @@ public static Flowable createFlowableDatasets(Callable inS public static Flowable createFlowableDatasets(Callable inSupplier) { - Flowable result = createFlowableFromInputStream( - inSupplier, - th -> eh -> in -> createIteratorQuads( - in, - RDFLanguages.contentTypeToLang(in.getContentType()), - in.getBaseURI(), - eh, - th)) +// Flowable result = createFlowableFromInputStream( +// inSupplier, +// th -> eh -> in -> createIteratorQuads( +// in, +// RDFLanguages.contentTypeToLang(in.getContentType()), +// in.getBaseURI(), +// eh, +// th)) + Flowable result = createFlowableQuads(inSupplier) .compose(DatasetGraphOpsRx.datasetsFromConsecutiveQuads( Quad::getGraph, DatasetGraphFactoryEx::createInsertOrderPreservingDatasetGraph)) @@ -773,7 +774,8 @@ public static void writeResources(Flowable flowable, OutputS public static class QuadEncoderDistinguish { protected Set priorGraphs = Collections.emptySet(); - public synchronized Dataset encode(Dataset dataset) { + // Do we need synchronized? Processing should happen in order anyway! + public Dataset encode(Dataset dataset) { Set now = Sets.newHashSet(dataset.asDatasetGraph().listGraphNodes()); List quads = Lists.newArrayList(dataset.asDatasetGraph().find()); @@ -950,7 +952,7 @@ public static > FlowableTransformer upstream - .flatMapMaybe(batch -> { + .concatMapMaybe(batch -> { for(Dataset item : batch) { Dataset encoded = encoder.encode(item); @@ -985,7 +987,7 @@ public static > FlowableTransformer cre } return upstream -> upstream - .flatMapMaybe(batch -> { + .concatMapMaybe(batch -> { RDFDataMgr.writeQuads(out, batch.iterator()); out.flush(); return Maybe.empty(); diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIterator.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIterator.java new file mode 100644 index 000000000..79bd98ca3 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIterator.java @@ -0,0 +1,28 @@ +package org.aksw.jena_sparql_api.rx; + + +import org.apache.jena.atlas.lib.Closeable; +import org.apache.jena.riot.system.PrefixMap; +import org.apache.jena.util.iterator.ClosableIterator; + +public interface RDFIterator + extends ClosableIterator, Closeable, java.io.Closeable, AutoCloseable +{ + /** + * Return the prefixes encountered so far. + * If the iterator is backed by a read-ahead parsing process prefix changes + * + * @return + */ + PrefixMap getPrefixes(); + + /** + * Returns true if the most recent call to next()/hasNext() caused a change + * in the prefixes + * + * @return + */ + boolean prefixesChanged(); + + String getBaseIri(); +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIteratorFromIterator.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIteratorFromIterator.java new file mode 100644 index 000000000..be16cc215 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIteratorFromIterator.java @@ -0,0 +1,54 @@ +package org.aksw.jena_sparql_api.rx; + +import org.apache.jena.atlas.iterator.IteratorResourceClosing; +import org.apache.jena.riot.system.PrefixMap; +import org.apache.jena.riot.system.PrefixMapFactory; + +public class RDFIteratorFromIterator + implements RDFIterator +{ + protected IteratorResourceClosing delegate; + protected String baseIri; + protected PrefixMap prefixMap; + + public RDFIteratorFromIterator(IteratorResourceClosing delegate, String baseIri) { + this(delegate, baseIri, PrefixMapFactory.create()); + } + + public RDFIteratorFromIterator(IteratorResourceClosing delegate, String baseIri, PrefixMap prefixMap) { + super(); + this.delegate = delegate; + this.baseIri = baseIri; + this.prefixMap = prefixMap; + } + + @Override + public void close() { + delegate.close(); + } + + @Override + public boolean hasNext() { + return delegate.hasNext(); + } + + @Override + public T next() { + return delegate.next(); + } + + @Override + public PrefixMap getPrefixes() { + return prefixMap; + } + + @Override + public boolean prefixesChanged() { + return false; + } + + @Override + public String getBaseIri() { + return baseIri; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIteratorFromPipedRDFIterator.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIteratorFromPipedRDFIterator.java new file mode 100644 index 000000000..4d3711468 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/RDFIteratorFromPipedRDFIterator.java @@ -0,0 +1,34 @@ +package org.aksw.jena_sparql_api.rx; + +import org.apache.jena.riot.lang.PipedRDFIterator; + +public class RDFIteratorFromPipedRDFIterator + extends PipedRDFIterator + implements RDFIterator +{ + /** A dirty flag for prefixes */ + protected boolean prefixesChanged = false; + + public RDFIteratorFromPipedRDFIterator(int bufferSize, boolean fair, int pollTimeout, int maxPolls) { + super(bufferSize, fair, pollTimeout, maxPolls); + } + + @Override + protected void prefix(String prefix, String iri) { + super.prefix(prefix, iri); + prefixesChanged = true; + } + + /** + * Returns the status of the dirty flag for prefixes and resets it to false. + * Hence, should this method return true at a future invocation there has been a change since + * the last invocation + * + */ + @Override + public boolean prefixesChanged() { + boolean result = prefixesChanged; + prefixesChanged = false; + return result; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/SparqlRx.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/SparqlRx.java index bb9006789..10bbe3554 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/SparqlRx.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/SparqlRx.java @@ -488,6 +488,10 @@ public static Single> fetchCountQueryPartition(SparqlQueryConnection Long xrowLimit = rowLimit == null ? null : rowLimit + 1; Entry countQuery = QueryGenerationUtils.createQueryCountPartition(query, partitionVars, xitemLimit, xrowLimit); +// System.out.println("Given: " + query); +// System.out.println(partitionVars); +// System.out.println("Generated count query: " + countQuery); + Var v = countQuery.getKey(); Query q = countQuery.getValue(); @@ -604,6 +608,20 @@ public static Flowable> execConstructGrouped(Function primaryKeyVars, boolean sortRowsByPartitionVar) { Template template = q.getConstructTemplate(); diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/SparqlScriptProcessor.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/SparqlScriptProcessor.java new file mode 100644 index 000000000..e57fd050e --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/SparqlScriptProcessor.java @@ -0,0 +1,403 @@ +package org.aksw.jena_sparql_api.rx; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.function.Consumer; +import java.util.function.Function; + +import org.aksw.jena_sparql_api.stmt.SparqlQueryParser; +import org.aksw.jena_sparql_api.stmt.SparqlQueryParserImpl; +import org.aksw.jena_sparql_api.stmt.SparqlQueryParserWrapperSelectShortForm; +import org.aksw.jena_sparql_api.stmt.SparqlStmt; +import org.aksw.jena_sparql_api.stmt.SparqlStmtIterator; +import org.aksw.jena_sparql_api.stmt.SparqlStmtMgr; +import org.aksw.jena_sparql_api.stmt.SparqlStmtParser; +import org.aksw.jena_sparql_api.stmt.SparqlStmtParserImpl; +import org.aksw.jena_sparql_api.stmt.SparqlStmtUpdate; +import org.aksw.jena_sparql_api.stmt.SparqlStmtUtils; +import org.aksw.jena_sparql_api.stmt.SparqlUpdateParser; +import org.aksw.jena_sparql_api.stmt.SparqlUpdateParserImpl; +import org.aksw.jena_sparql_api.syntax.UpdateRequestUtils; +import org.aksw.jena_sparql_api.utils.NodeUtils; +import org.apache.jena.atlas.web.TypedInputStream; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Dataset; +import org.apache.jena.query.DatasetFactory; +import org.apache.jena.query.Syntax; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFLanguages; +import org.apache.jena.riot.system.PrefixMap; +import org.apache.jena.shared.PrefixMapping; +import org.apache.jena.sparql.core.Prologue; +import org.apache.jena.sparql.modify.request.UpdateLoad; +import org.apache.jena.update.UpdateRequest; +import org.apache.jena.util.SplitIRI; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.StandardSystemProperty; + + +/** + * Super-convenient SPARQL statement loader. Probes arguments whether they are inline SPARQL statements or refer to files. + * Referred files may contain RDF or sequences of SPARQL statements. + * RDF files are loaded fully into memory as UpdateModify statements. + * + * Usually the SparqlQueryParserWrapperSelectShortForm should be active which allows omitting the SELECT keyword making + * querying even less verbose + * + * Prefixes from an input source carry over to the next. Hence, if an RDF file is loaded, its prefixes can be used in + * subsequent SPARQL statements without need for declaration. + * + * + * For example assuming that mydata defines a foo prefix + * ad-hoc querying becomes possible simply using the arguments ["people.ttl", "?s { ?s a foaf:Person }"] + * + * Arguments that start with cwd=/some/path sets the current working directory on which SPARQL queries operate. + * Effectively it sets the base URL of the following SPARQL queries. + * Relative paths are resolved against the current working directory as reported by the JVM. + * Use "cwd=" (with an empty string) to reset the CWD to that of the JVM + * + * @author raven + * + */ +public class SparqlScriptProcessor { + + public static class Provenance { + public Provenance(String arg) { + this(arg, null, null); + } + + public Provenance(String argStr, Long line, Long column) { + super(); + this.argStr = argStr; + this.line = line; + this.column = column; + this.sparqlPath = ""; + } + + + // non-null if the query orginated from a sparql file + protected String sparqlPath; + /** + * The orginal argument string + */ + protected String argStr; + + protected Long line; + + protected Long column; + + public String getSparqlPath() { + return sparqlPath; + } + + public void setSparqlPath(String sparqlPath) { + this.sparqlPath = sparqlPath; + } + + @Override + public String toString() { + String result = argStr + + (line == null ? (column == null ? "" : ":") : ":" + line) + + (column == null ? "" : ":" + column); + return result; + } + } + + public static final String cwdKey = "cwd="; + public static final String cwdResetCwd = "cwd"; + + private static final Logger logger = LoggerFactory.getLogger(SparqlScriptProcessor.class); + + protected SparqlStmtParser sparqlParser ; + protected PrefixMapping globalPrefixes; + protected Path cwd = null; + protected List> sparqlStmts = new ArrayList<>(); + protected List> postTransformers = new ArrayList<>(); + + public SparqlScriptProcessor(SparqlStmtParser sparqlParser, PrefixMapping globalPrefixes) { + super(); + this.sparqlParser = sparqlParser; + this.globalPrefixes = globalPrefixes; + } + + public void addPostTransformer(Function transformer) { + postTransformers.add(transformer); + } + + public void addPostMutator(Consumer mutator) { + postTransformers.add(stmt -> { mutator.accept(stmt); return stmt; }); + } + + public List> getSparqlStmts() { + return sparqlStmts; + } + + public SparqlStmtParser getSparqlParser() { + return sparqlParser; + } + + + /** + * Create a script processor that substitutes references to environment variables + * with the appropriate values. + * + * @param pm + * @return + */ + public static SparqlScriptProcessor createWithEnvSubstitution(PrefixMapping pm) { + Prologue p = new Prologue(pm); + SparqlQueryParser queryParser = SparqlQueryParserWrapperSelectShortForm.wrap( + SparqlQueryParserImpl.create(Syntax.syntaxARQ, p)); + + SparqlUpdateParser updateParser = SparqlUpdateParserImpl + .create(Syntax.syntaxARQ, new Prologue(p)); + + SparqlStmtParser sparqlParser = + SparqlStmtParser.wrapWithTransform( + new SparqlStmtParserImpl(queryParser, updateParser, false), + stmt -> SparqlStmtUtils.applyNodeTransform(stmt, x -> NodeUtils.substWithLookup(x, System::getenv))); + + SparqlScriptProcessor result = new SparqlScriptProcessor(sparqlParser, pm); + return result; + } + + + + + public void process(List filenames) { + for (String filename : filenames) { + process(filename); + } + } + + public void process(String filename) { + process(filename, sparqlStmts); + } + + public void process(String filename, List> result) { + logger.info("Processing argument '" + filename + "'"); + + if(filename.startsWith(cwdKey)) { + String cwdValue = filename.substring(cwdKey.length()).trim(); + + if(cwd == null) { + cwd = Paths.get(StandardSystemProperty.USER_DIR.value()); + } + + cwd = cwd.resolve(cwdValue); + logger.info("Pinned working directory to " + cwd); + } else if(filename.equals(cwdResetCwd)) { + // If cwdValue is an empty string, reset the working directory + logger.info("Unpinned working directory"); + + cwd = null; + } else { + + boolean isProcessed = false; + try { + Provenance prov = new Provenance(filename); + UpdateRequest ur = tryLoadFileAsUpdateRequest(filename, globalPrefixes); + result.add(new SimpleEntry<>(new SparqlStmtUpdate(ur), prov)); + + isProcessed = true; + } catch (Exception e) { + logger.debug("Probing " + filename + " as RDF data file failed", e); + } + + if(!isProcessed) { + + String baseIri = cwd == null ? null : cwd.toUri().toString(); + try { + Iterator it = SparqlStmtMgr.loadSparqlStmts(filename, globalPrefixes, sparqlParser, baseIri); + + if(it != null) { + //Path sparqlPath = Paths.get(filename).toAbsolutePath(); + String sparqlFileName = SplitIRI.localname(filename); + + + SparqlStmtIterator itWithPos = it instanceof SparqlStmtIterator + ? (SparqlStmtIterator)it + : null; + + while(it.hasNext()) { + Provenance prov; + if(itWithPos != null) { + prov = new Provenance(filename, (long)itWithPos.getLine(), (long)itWithPos.getColumn()); + logger.info("Preparing SPARQL statement at line " + itWithPos.getLine() + ", column " + itWithPos.getColumn()); + } else { + prov = new Provenance(filename); + logger.info("Preparing inline SPARQL argument " + filename); + } + prov.setSparqlPath(sparqlFileName); + + SparqlStmt stmt = it.next(); + + PrefixMapping stmtPrefixes = stmt.getPrefixMapping(); + if(stmtPrefixes != null) { + globalPrefixes.setNsPrefixes(stmtPrefixes); + } + + // Move optimizePrefixes to transformers? + SparqlStmtUtils.optimizePrefixes(stmt); + + for (Function postTransformer : postTransformers) { + SparqlStmt tmp = postTransformer.apply(stmt); + stmt = Objects.requireNonNull(tmp, "Transformations yeld null " + postTransformer); + } + + result.add(new SimpleEntry<>(stmt, prov)); + } + } + } catch (Exception e) { + throw new RuntimeException("Failed to process argument " + filename, e); + } + } + } + + } + + + public static UpdateRequest tryLoadFileAsUpdateRequest(String filename, PrefixMapping globalPrefixes) throws IOException { + UpdateRequest result = null; + + // TODO We should map to filename through the stream manager +// String str = StreamManager.get().mapURI(filename); + + // Try as RDF file + try(TypedInputStream tmpIn = RDFDataMgrEx.open(filename, Arrays.asList(Lang.TRIG, Lang.NQUADS))) { +// if(tmpIn == null) { +// throw new FileNotFoundException(filename); +// } + + + // Unwrap the input stream for less overhead + InputStream in = tmpIn.getInputStream(); + + + String contentType = tmpIn.getContentType(); + logger.info("Detected format: " + contentType); + Lang rdfLang = contentType == null ? null : RDFLanguages.contentTypeToLang(contentType); + + //Lang rdfLang = RDFDataMgr.determineLang(filename, null, null); + if(rdfLang != null) { + + RDFIterator itTmp; + // FIXME Validate we are really using turtle/trig here + if(RDFLanguages.isTriples(rdfLang)) { + itTmp = RDFDataMgrEx.createIteratorTriples(globalPrefixes, in, Lang.TTL); + } else if(RDFLanguages.isQuads(rdfLang)) { + itTmp = RDFDataMgrEx.createIteratorQuads(globalPrefixes, in, Lang.TRIG); + } else { + throw new RuntimeException("Unknown lang: " + rdfLang); + } + + + int window = 100; + try (RDFIterator it = itTmp) { + int remaining = window; + while (it.hasNext()) { + --remaining; + if (remaining == 0) { + PrefixMap pm = it.getPrefixes(); + logger.info("Gathered " + pm.size() + " prefixes from " + filename); + globalPrefixes.setNsPrefixes(pm.getMapping()); + break; + } + + if (it.prefixesChanged()) { + remaining = 100; + } + + it.next(); + } + } + + // String fileUrl = "file://" + Paths.get(filename).toAbsolutePath().normalize().toString(); + result = new UpdateRequest(new UpdateLoad(filename, (Node)null)); + } + } + return result; + } + + + public static UpdateRequest tryLoadFileAsUpdateRequestOld(String filename, PrefixMapping globalPrefixes) throws IOException { + UpdateRequest result = null; + + // Try as RDF file + try(TypedInputStream tmpIn = RDFDataMgrEx.open(filename, Arrays.asList(Lang.TRIG, Lang.NQUADS))) { +// if(tmpIn == null) { +// throw new FileNotFoundException(filename); +// } + + InputStream in = tmpIn.getInputStream(); + + + String contentType = tmpIn.getContentType(); + logger.info("Detected format: " + contentType); + Lang rdfLang = contentType == null ? null : RDFLanguages.contentTypeToLang(contentType); + + //Lang rdfLang = RDFDataMgr.determineLang(filename, null, null); + if(rdfLang != null) { + + if(RDFLanguages.isTriples(rdfLang)) { + + Model tmp = ModelFactory.createDefaultModel(); + //InputStream in = SparqlStmtUtils.openInputStream(filename); + // FIXME Validate we are really using turtle here + RDFDataMgrEx.parseTurtleAgainstModel(tmp, globalPrefixes, in); + // Copy any prefixes from the parse back to our global prefix mapping + globalPrefixes.setNsPrefixes(tmp); + + // Convert the model to a SPARQL insert statement + result = UpdateRequestUtils.createUpdateRequest(tmp, null); + + } else if(RDFLanguages.isQuads(rdfLang)) { + Dataset tmp = DatasetFactory.create(); + // InputStream in = SparqlStmtUtils.openInputStream(filename); + + // FIXME Validate we are really using turtle here + RDFDataMgrEx.parseTrigAgainstDataset(tmp, globalPrefixes, in); + // Copy any prefixes from the parse back to our global prefix mapping + + Model m = tmp.getDefaultModel(); + if(m != null) { + globalPrefixes.setNsPrefixes(m); + } + + logger.info("Gathering prefixes from named graphs..."); + int i = 0; + Iterator it = tmp.listNames(); + while(it.hasNext()) { + String name = it.next(); + m = tmp.getNamedModel(name); + if(m != null) { + ++i; + globalPrefixes.setNsPrefixes(m); + } + } + logger.info("Gathered prefixes from " + i + " named graphs"); + + result = UpdateRequestUtils.createUpdateRequest(tmp, null); + + } else { + throw new RuntimeException("Unknown lang: " + rdfLang); + } + + } + } + return result; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/TestPartitionedQueryRx.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/TestPartitionedQueryRx.java new file mode 100644 index 000000000..8a1f31131 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/TestPartitionedQueryRx.java @@ -0,0 +1,54 @@ +package org.aksw.jena_sparql_api.rx; + +import java.util.List; + +import org.aksw.jena_sparql_api.common.DefaultPrefixes; +import org.aksw.jena_sparql_api.stmt.SparqlQueryParserImpl; +import org.apache.jena.query.Dataset; +import org.apache.jena.query.Query; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdfconnection.RDFConnection; +import org.apache.jena.rdfconnection.RDFConnectionFactory; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RDFFormat; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sys.JenaSystem; + +//public class TestPartitionedQueryRx { +// public static void main(String[] args) { +// JenaSystem.init(); +// +// String queryStr = +// "CONSTRUCT {\n" + +// " ?pub dct:creator ?auts .\n" + +// " ?ln rdf:first ?f ; rdf:rest ?r . # ln = list node\n" + +// "} {\n" + +// " ?pub dct:creator ?auts .\n" + +// " ?auts rdf:rest* ?ln .\n" + +// " ?ln rdf:first ?f ; rdf:rest ?r .\n" + +// "}"; +// +// Query standardQuery = SparqlQueryParserImpl +// .create(DefaultPrefixes.prefixes) +// .apply(queryStr); +// +// EntityQueryImpl rootedQuery = new EntityQueryImpl(); +// rootedQuery.setPartitionSelectorQuery(standardQuery); +// +// Var rootNode = Var.alloc("pub"); +// rootedQuery.getDirectGraphPartition().getEntityNodes().add(rootNode); +// rootedQuery.getPartitionVars().add(rootNode); +// +// Dataset ds = RDFDataMgr.loadDataset("https://raw.githubusercontent.com/Aklakan/aklakans-devblog/master/2020-10-20-rdflist/src/main/resources/publications.ttl"); +// +// try (RDFConnection conn = RDFConnectionFactory.connect(ds)) { +// List rdfNodes = EntityQueryRx.execConstructRooted(conn, rootedQuery).toList().blockingGet(); +// +// for (RDFNode rdfNode : rdfNodes) { +// System.out.println("Got node: " + rdfNode + ": vvv"); +// RDFDataMgr.write(System.out, rdfNode.getModel(), RDFFormat.TURTLE_BLOCKS); +// } +// } +// +// } +//} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/engine/EntityQueryFactory.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/engine/EntityQueryFactory.java new file mode 100644 index 000000000..cefa09076 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/engine/EntityQueryFactory.java @@ -0,0 +1,11 @@ +package org.aksw.jena_sparql_api.rx.entity.engine; + +import org.aksw.jena_sparql_api.concepts.Concept; +import org.aksw.jena_sparql_api.rx.EntityBaseQuery; + +public class EntityQueryFactory { + public EntityBaseQuery createFromConcept(Concept concept) { + EntityBaseQuery result = EntityBaseQuery.create(concept.getVar(), concept.asQuery()); + return result; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/engine/EntityQueryRx.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/engine/EntityQueryRx.java new file mode 100644 index 000000000..3318c62dc --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/engine/EntityQueryRx.java @@ -0,0 +1,1457 @@ +package org.aksw.jena_sparql_api.rx.entity.engine; + +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Random; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.LongStream; +import java.util.stream.Stream; + +import org.aksw.commons.collections.SetUtils; +import org.aksw.commons.collections.generator.Generator; +import org.aksw.jena_sparql_api.mapper.Accumulator; +import org.aksw.jena_sparql_api.mapper.Aggregator; +import org.aksw.jena_sparql_api.rx.AggCollection; +import org.aksw.jena_sparql_api.rx.AggObjectGraph; +import org.aksw.jena_sparql_api.rx.AggObjectGraph.AccObjectGraph; +import org.aksw.jena_sparql_api.rx.EntityBaseQuery; +import org.aksw.jena_sparql_api.rx.EntityGraphFragment; +import org.aksw.jena_sparql_api.rx.ExprTransformAllocAggregate; +import org.aksw.jena_sparql_api.rx.SparqlRx; +import org.aksw.jena_sparql_api.rx.entity.model.EntityQueryBasic; +import org.aksw.jena_sparql_api.rx.entity.model.EntityQueryImpl; +import org.aksw.jena_sparql_api.rx.entity.model.EntityTemplate; +import org.aksw.jena_sparql_api.rx.entity.model.EntityTemplateImpl; +import org.aksw.jena_sparql_api.rx.entity.model.ExprListEval; +import org.aksw.jena_sparql_api.rx.entity.model.GraphPartitionJoin; +import org.aksw.jena_sparql_api.rx.entity.model.GraphPartitionWithEntities; +import org.aksw.jena_sparql_api.rx.op.OperatorOrderedGroupBy; +import org.aksw.jena_sparql_api.utils.ElementUtils; +import org.aksw.jena_sparql_api.utils.NodeTransformRenameMap; +import org.aksw.jena_sparql_api.utils.QuadPatternUtils; +import org.aksw.jena_sparql_api.utils.QueryUtils; +import org.aksw.jena_sparql_api.utils.VarExprListUtils; +import org.aksw.jena_sparql_api.utils.VarGeneratorBlacklist; +import org.aksw.jena_sparql_api.utils.VarGeneratorImpl2; +import org.aksw.jena_sparql_api.utils.VarUtils; +import org.apache.jena.ext.com.google.common.collect.Iterables; +import org.apache.jena.ext.com.google.common.collect.Maps; +import org.apache.jena.ext.com.google.common.collect.Multimap; +import org.apache.jena.ext.com.google.common.collect.MultimapBuilder; +import org.apache.jena.ext.com.google.common.collect.Sets; +import org.apache.jena.ext.com.google.common.collect.Streams; +import org.apache.jena.ext.com.google.common.hash.Hashing; +import org.apache.jena.ext.com.google.common.io.BaseEncoding; +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.graph.NodeFactory; +import org.apache.jena.graph.Triple; +import org.apache.jena.query.Query; +import org.apache.jena.query.ResultSet; +import org.apache.jena.query.SortCondition; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdfconnection.SparqlQueryConnection; +import org.apache.jena.sparql.algebra.Table; +import org.apache.jena.sparql.algebra.TableFactory; +import org.apache.jena.sparql.core.BasicPattern; +import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.core.VarExprList; +import org.apache.jena.sparql.engine.binding.Binding; +import org.apache.jena.sparql.engine.binding.BindingFactory; +import org.apache.jena.sparql.expr.Expr; +import org.apache.jena.sparql.expr.ExprList; +import org.apache.jena.sparql.expr.ExprTransformer; +import org.apache.jena.sparql.expr.ExprVar; +import org.apache.jena.sparql.expr.ExprVars; +import org.apache.jena.sparql.expr.NodeValue; +import org.apache.jena.sparql.graph.GraphFactory; +import org.apache.jena.sparql.graph.NodeTransform; +import org.apache.jena.sparql.syntax.Element; +import org.apache.jena.sparql.syntax.ElementGroup; +import org.apache.jena.sparql.syntax.ElementOptional; +import org.apache.jena.sparql.syntax.ElementSubQuery; +import org.apache.jena.sparql.syntax.PatternVars; +import org.apache.jena.sparql.syntax.Template; +import org.apache.jena.sparql.util.ExprUtils; +import org.apache.jena.sparql.util.ModelUtils; +import org.apache.jena.util.iterator.ExtendedIterator; + +import io.reactivex.rxjava3.core.Flowable; +import io.reactivex.rxjava3.core.FlowableTransformer; + +/** + * Methods for the execution of {@link EntityQueryBasic}s + * + * @author raven + * + */ +public class EntityQueryRx { + + public static Flowable execConstructEntities( + SparqlQueryConnection conn, + EntityQueryImpl query) { + + return execConstructEntities( + conn, query, + GraphFactory::createDefaultGraph); + } + + public static Flowable execConstructEntities( + SparqlQueryConnection conn, + EntityQueryImpl query, + Supplier graphSupplier) { + + return execConstructEntities( + conn, query, + GraphFactory::createDefaultGraph, EntityQueryRx::defaultEvalToNode); + } + + public static Flowable execConstructEntities( + SparqlQueryConnection conn, + EntityQueryImpl queryEx, + Supplier graphSupplier, + ExprListEval exprListEval) { + + EntityQueryBasic basicEntityQuery = assembleEntityAndAttributeParts(queryEx); + return execConstructEntities(conn, basicEntityQuery, graphSupplier, exprListEval); + } + + + + public static Flowable execConstructEntitiesNg( + SparqlQueryConnection conn, + EntityQueryImpl query) { + + return execConstructEntitiesNg( + conn, query, + GraphFactory::createDefaultGraph); + } + + public static Flowable execConstructEntitiesNg( + SparqlQueryConnection conn, + EntityQueryImpl query, + Supplier graphSupplier) { + + return execConstructEntitiesNg( + conn, query, + GraphFactory::createDefaultGraph, EntityQueryRx::defaultEvalToNode); + } + + public static Flowable execConstructEntitiesNg( + SparqlQueryConnection conn, + EntityQueryImpl queryEx, + Supplier graphSupplier, + ExprListEval exprListEval) { + + EntityQueryBasic basicEntityQuery = assembleEntityAndAttributeParts(queryEx); + return execConstructEntitiesNg(conn, basicEntityQuery, graphSupplier, exprListEval); + //return execConstructPartitionedOld(conn, assembledQuery, graphSupplier, exprListEval); + } + + + + + /** Execute a partitioned query. + * See {@link #execConstructEntities(SparqlQueryConnection, EntityQueryBasic, Supplier, ExprListEval)} */ + public static Flowable execConstructRooted(SparqlQueryConnection conn, EntityQueryBasic query) { + return execConstructRooted( + conn, query, + GraphFactory::createDefaultGraph); + } + + /** Execute a partitioned query. + * See {@link #execConstructEntities(SparqlQueryConnection, EntityQueryBasic, Supplier, ExprListEval)} */ + public static Flowable execConstructRooted(SparqlQueryConnection conn, EntityQueryBasic query, + Supplier graphSupplier) { + return execConstructEntities( + conn, query, + GraphFactory::createDefaultGraph, EntityQueryRx::defaultEvalToNode); + } + + /** + * Execute a partitioned SPARQL SELECT query such as + * SELECT ?s ?p ?o { ?s ?p ?o } PARTITION BY ?s + * + * All bindings that bind the partition variables to the same values are returned as a group in + * a {@link Table}. + * + * @param conn The connection on which to run the query + * @param query + * @return A flowable of pairs of key bindings and tables + */ +// public static Flowable> execSelectPartitioned( +// SparqlQueryConnection conn, +// EntityQueryBasic query) { +// +// Query standardQuery = query.getPartitionSelectorQuery(); +// +// return execSelectPartitioned(conn, standardQuery, query.getPartitionVars()); +// } + + /** + * Create a transformed copy of the query where all variables that + * need to join have the same name whereas variables that are not + * supposed to join have been remapped + * + * + * @param query + * @return + */ + public static EntityQueryImpl alignVariables(EntityQueryImpl query) { + EntityQueryImpl result = new EntityQueryImpl(); + result.setBaseQuery(query.getBaseQuery()); + + Set sourceVars = QueryUtils.mentionedVars(result.getBaseQuery().getStandardQuery()); + List sourceJoinVars = result.getBaseQuery().getPartitionVars(); + + Generator varGen = VarGeneratorImpl2.create(); + + for (GraphPartitionJoin join : query.getMandatoryJoins()) { + + EntityGraphFragment egm = join.getEntityGraphFragment(); + + Set targetVars = ElementUtils.getVarsMentioned(egm.getElement()); + List targetJoinVars = egm.getPartitionVars(); + + // Create a var mapping that joins on the partition vars without + // causing a clash on any other var + Map varMap = VarUtils.createJoinVarMap( + sourceVars, targetVars, sourceJoinVars, targetJoinVars, varGen); + + // Add any newly allocated variables in the varMap to the source vars for the next iteration + // as to prevent accidental joins on the already encountered variables + sourceVars.addAll(varMap.values()); + + NodeTransform nodeTransform = new NodeTransformRenameMap(varMap); + GraphPartitionJoin newJoin = join.applyNodeTransform(nodeTransform); + + result.getMandatoryJoins().add(newJoin); + } + + // TODO Get rid of code duplication + + for (GraphPartitionJoin join : query.getOptionalJoins()) { + + EntityGraphFragment egm = join.getEntityGraphFragment(); + + Set targetVars = ElementUtils.getVarsMentioned(egm.getElement()); + List targetJoinVars = egm.getPartitionVars(); + + // Create a var mapping that joins on the partition vars without + // causing a clash on any other var + Map varMap = VarUtils.createJoinVarMap( + sourceVars, targetVars, sourceJoinVars, targetJoinVars, varGen); + + // Add any newly allocated variables in the varMap to the source vars for the next iteration + // as to prevent accidental joins on the already encountered variables + sourceVars.addAll(varMap.values()); + + NodeTransform nodeTransform = new NodeTransformRenameMap(varMap); + GraphPartitionJoin newJoin = join.applyNodeTransform(nodeTransform); + + result.getOptionalJoins().add(newJoin); + } + return result; + } + + public static EntityQueryBasic assembleEntityAndAttributeParts(EntityQueryImpl queryRaw) { + + EntityQueryImpl queryTmp = alignVariables(queryRaw); + EntityQueryImpl query = mergeFetchGroups(queryTmp); + +// Query baseQuery = query.getBaseQuery().getStandardQuery(); +// List partitionVars = query.getBaseQuery().getPartitionVars(); + +// List partitionOrderBy = queryRaw.getBaseQuery().getPartitionOrderBy(); + +// boolean needsSubSelect = !(partitionOrderBy == null || partitionOrderBy.isEmpty()) +// || baseQuery.hasLimit() +// || baseQuery.hasOffset(); + +// List combinedFilter = new ArrayList<>(); +// //List combinedAttributes = new ArrayList<>(); +// combinedFilter.add(baseQuery.getQueryPattern()); +// +// for (GraphPartitionJoin gp : query.getAuxiliaryGraphPartitions()) { +// Element elt = gp.getEntityGraphFragment().getElement(); +// if (!(elt instanceof ElementOptional)) { +// // Make the element join with the partition variables +// combinedFilter.add(elt); +// } +// } +// +// System.out.println("Filter " + combinedFilter); +// +// +// for (GraphPartitionJoin gp : query.getAuxiliaryGraphPartitions()) { +// String lfgn = gp.getLazyFetchGroupName(); +//// System.out.println("Fetch group: " + lfgn); +//// System.out.println("CONSTRUCT " + gp.getEntityTemplate().getTemplate().getBGP()); +//// System.out.println("WHERE " + gp.getElement()); +// System.out.println("----"); +// } + + GraphPartitionJoin join = Iterables.getFirst(query.getMandatoryJoins(), + new GraphPartitionJoin( + EntityGraphFragment.empty(query.getBaseQuery().getPartitionVars()))); + + GraphPartitionJoin optional = Iterables.getFirst(query.getOptionalJoins(), + new GraphPartitionJoin( + EntityGraphFragment.empty(query.getBaseQuery().getPartitionVars()))); + + EntityQueryBasic result = new EntityQueryBasic(); + result.setBaseQuery(queryRaw.getBaseQuery()); + result.setAttributeFragment(join.getEntityGraphFragment()); + result.setOptionalAttributeFragment(optional.getEntityGraphFragment()); + + +// if (true) throw new RuntimeException("implement me"); + + return result; + } + + + /** + * Merges all graph partitions with the same fetch group name into a single + * graph partition. + * + * @param queryRaw + * @return + */ + public static EntityQueryImpl mergeFetchGroups(EntityQueryImpl query) { + + Query baseQuery = query.getBaseQuery().getStandardQuery(); + List partitionVars = query.getBaseQuery().getPartitionVars(); + + // First group all graph partitions by name + // Then merge their templates and patterns into a single one + Multimap rawFetchGroups = MultimapBuilder.hashKeys().arrayListValues().build(); + Multimap rawOptionalFetchGroups = MultimapBuilder.hashKeys().arrayListValues().build(); + + // todo add the direct graph partition to the fetch group if a subSelect is needed + // query.getDirectGraphPartition() + + + List combinedFilter = new ArrayList<>(); + //List combinedAttributes = new ArrayList<>(); + combinedFilter.add(baseQuery.getQueryPattern()); + + for (GraphPartitionJoin join : query.getMandatoryJoins()) { + List elts = ElementUtils.toElementList(join.getEntityGraphFragment().getElement()); + Element elt = ElementUtils.groupIfNeeded(elts); + String lfgn = join.getLazyFetchGroupName(); + // Make the element join with the partition variables + combinedFilter.add(elt); + + rawFetchGroups.put(lfgn, join); + } + + for (GraphPartitionJoin join : query.getOptionalJoins()) { + List elts = ElementUtils.toElementList(join.getEntityGraphFragment().getElement()); + Element elt = ElementUtils.groupIfNeeded(elts); + String lfgn = join.getLazyFetchGroupName(); + rawOptionalFetchGroups.put(lfgn, join); + } + + + Collection fetchGroups = new ArrayList<>(); + for (Entry> e : rawFetchGroups.asMap().entrySet()) { + String groupName = e.getKey(); + if (e.getValue().isEmpty()) { + continue; + } + GraphPartitionJoin newGp = merge(groupName, partitionVars, e.getValue(), false); + fetchGroups.add(newGp); + } + + Collection optionalFetchGroups = new ArrayList<>(); + for (Entry> e : rawOptionalFetchGroups.asMap().entrySet()) { + String groupName = e.getKey(); + if (e.getValue().isEmpty()) { + continue; + } + + GraphPartitionJoin newGp = merge(groupName, partitionVars, e.getValue(), true); + optionalFetchGroups.add(newGp); + } + + + EntityQueryImpl result = new EntityQueryImpl(); + result.setBaseQuery(query.getBaseQuery()); + result.getMandatoryJoins().addAll(fetchGroups); + result.getOptionalJoins().addAll(optionalFetchGroups); + + return result; + } + + + public static GraphPartitionJoin merge( + String groupName, + List partitionVars, + Collection gps, + boolean isOptional) { + + Element newElement = ElementUtils.groupIfNeeded(gps.stream() + .map(GraphPartitionJoin::getEntityGraphFragment) + .map(EntityGraphFragment::getElement) + .map(ElementUtils::toElementList) + .map(list -> isOptional + ? Collections.singletonList(new ElementOptional(ElementUtils.groupIfNeeded(list))) + : list) + .flatMap(Collection::stream) + .collect(Collectors.toList())); + + BasicPattern bgp = new BasicPattern(); + gps.stream() + .map(gp -> gp.getEntityGraphFragment().getEntityTemplate().getTemplate().getBGP()) + .forEach(bgp::addAll); + Template newTemplate = new Template(bgp); + + List newEntityNodes = gps.stream() + .map(GraphPartitionJoin::getEntityGraphFragment) + .map(EntityGraphFragment::getEntityTemplate) + .map(EntityTemplate::getEntityNodes) + .flatMap(Collection::stream) + .distinct() + .collect(Collectors.toList()); + + Map newBnodeIdMapping = gps.stream() + .map(GraphPartitionJoin::getEntityGraphFragment) + .map(EntityGraphFragment::getEntityTemplate) + .map(EntityTemplate::getBnodeIdMapping) + .map(Map::entrySet) + .flatMap(Collection::stream) + .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); + + + EntityTemplate newEntityTemplate = new EntityTemplateImpl( + newEntityNodes, newTemplate, newBnodeIdMapping); + + EntityGraphFragment newFragment = new EntityGraphFragment(partitionVars, newEntityTemplate, newElement); + + + List parentJoinVars = null; // TODO Handle in the future + List subJoins = null; + GraphPartitionJoin result = new GraphPartitionJoin(newFragment, parentJoinVars, groupName, subJoins); + return result; + } + + /** + * Execute a SPARQL select query and partition its result set by the given partition + * variables. + * + * @param conn + * @param selectQuery + * @param partitionVars + * @return + */ + public static Flowable> execSelectPartitioned( + SparqlQueryConnection conn, + Query selectQuery, + List partitionVars) { + + if (!selectQuery.isSelectType()) { + throw new RuntimeException("Query must be of select type"); + } + + Function bindingToKey = SparqlRx.createGrouper(partitionVars, false); + + Aggregator aggregator = new AggCollection<>( + TableFactory::create, + Function.identity(), + Table::addBinding + ); + + Flowable> result = SparqlRx + // For future reference: If we get an empty results by using the query object, we probably have wrapped a variable with NodeValue.makeNode. + .execSelectRaw(() -> conn.query(selectQuery)) + .compose(aggregateConsecutiveItemsWithSameKey(bindingToKey, aggregator)); + + return result; + } + + + public static Flowable execConstructPartitionedOld( + SparqlQueryConnection conn, + EntityQueryImpl queryEx, + Supplier graphSupplier, + ExprListEval exprListEval) { + + EntityQueryBasic assembledQuery = assembleEntityAndAttributeParts(queryEx); + + return execConstructPartitionedOld(conn, assembledQuery, graphSupplier, exprListEval); + } + + public static Flowable execConstructPartitionedOld( + SparqlQueryConnection conn, + EntityQueryBasic queryEx, + Supplier graphSupplier, + ExprListEval exprListEval) { + +// Model entitySortModel = ModelFactory.createDefaultModel(); + EntityQueryProcessed tmp = processEntityQuery(queryEx, false, graphSupplier, exprListEval); + + return execQueryActual(conn, tmp.partitionVars, tmp.trackedTemplateNodes, tmp.selectQuery, tmp.tableToGraph); + } + + public static class EntityQueryProcessed { + protected List partitionVars; + protected Query selectQuery; + protected Set trackedTemplateNodes; + protected Function tableToGraph; + + public EntityQueryProcessed(List partitionVars, Query selectQuery, Set trackedTemplateNodes, + Function tableToGraph) { + super(); + this.partitionVars = partitionVars; + this.selectQuery = selectQuery; + this.trackedTemplateNodes = trackedTemplateNodes; + this.tableToGraph = tableToGraph; + } + + public Query getInnerSelect() { + ElementGroup grp = (ElementGroup)(selectQuery.getQueryPattern()); + ElementSubQuery subQueryElt = (ElementSubQuery)grp.get(0); + Query result = subQueryElt.getQuery(); + + return result; + } + + public List getPartitionVars() { + return partitionVars; + } + + public Query getSelectQuery() { + return selectQuery; + } + + public Set getTrackedTemplateNodes() { + return trackedTemplateNodes; + } + + public Function getTableToGraph() { + return tableToGraph; + } + + + } + + public static EntityQueryProcessed processEntityQuery( + EntityQueryBasic queryEx, + boolean forceSubSelect) { + return processEntityQuery(queryEx, forceSubSelect, GraphFactory::createDefaultGraph, EntityQueryRx::defaultEvalToNode); + } + + /** + * Execute a CONSTRUCT query using partitions. + * + * @param conn + * @param queryEx + * @param graphSupplier + * @param exprListEval + * @return + */ + public static EntityQueryProcessed processEntityQuery( + EntityQueryBasic queryEx, + boolean forceSubSelect, + Supplier graphSupplier, + ExprListEval exprListEval) { + + EntityBaseQuery baseQuery = queryEx.getBaseQuery(); + + EntityTemplate directTemplate = baseQuery.getEntityTemplate(); + EntityTemplate attributeTemplate = queryEx.getAttributeFragment().getEntityTemplate(); + EntityTemplate optionalTemplate = queryEx.getOptionalAttributeFragment().getEntityTemplate(); + + // Combine the direct and attribute templates +// Map varMap = VarUtils.createJoinVarMap( +// sourceVars, targetVars, sourceJoinVars, targetJoinVars, varGen); + + + EntityTemplate effectiveTemplate = EntityTemplate.merge( + directTemplate, + attributeTemplate, + optionalTemplate); + + Query standardQuery = baseQuery.getStandardQuery(); + + // FIXME Check whether this needs subquery wrapping + Element filterElement = standardQuery.getQueryPattern(); + + + //Template template = standardQuery.getConstructTemplate(); + Template template = effectiveTemplate.getTemplate(); + Map idMapping = directTemplate.getBnodeIdMapping(); + + + List partitionVars = baseQuery.getPartitionVars(); + List partitionOrderBy = baseQuery.getPartitionOrderBy(); + + + Set essentialProjectVars = getEssentialProjectVars( + template, idMapping); + +// Function bindingToRootNodeInst = rootNode == null +// ? null +// : createKeyFunction(rootNode, idMapping, exprListEval); + +// List entityVars = getEntityVars(rootNode, idMapping); + + Set trackedTemplateNodes = new LinkedHashSet<>(effectiveTemplate.getEntityNodes()); + + Set blacklist = QueryUtils.mentionedVars(standardQuery); + Generator varGen = VarGeneratorBlacklist.create("sortKey", blacklist); + + + // If direct template is non-empty then extend the attribute element with the selector + // (in order to expose the seletors variables as attributes) + + // conversely, if the attribute element is non-optional then add it to the selector + + + Element attributeElement = queryEx.getAttributeFragment().getElement(); //standardQuery.getQueryPattern(); + Element optionalAttributeElement = queryEx.getOptionalAttributeFragment().getElement(); + + boolean needsSubSelect = forceSubSelect + || !(partitionOrderBy == null || partitionOrderBy.isEmpty()) + || standardQuery.hasLimit() + || standardQuery.hasOffset(); + + // If there is no need for a subselect then just combine filter and attribute + + List filterElts = ElementUtils.toElementList(filterElement); + List attrElts = ElementUtils.toElementList(attributeElement); + List optAttrElts = ElementUtils.toElementList(optionalAttributeElement); + + Element effectiveFilter = filterElement; + Element effectiveAttribute = attributeElement; + + Query selectQuery; + if (!needsSubSelect) { + effectiveFilter = ElementUtils.groupIfNeeded(Iterables.concat( + filterElts, attrElts, optAttrElts)); + effectiveAttribute = null; + + standardQuery.setQueryPattern(effectiveFilter); + + selectQuery = preprocessQueryForPartitionWithoutSubSelect( + standardQuery, + partitionVars, +// attributeElement, + essentialProjectVars, + true); + //partitionOrderBy, + //true); + + } else { + effectiveFilter = ElementUtils.groupIfNeeded(Iterables.concat(filterElts, attrElts)); + + if (!directTemplate.getTemplate().getTriples().isEmpty()) { + effectiveAttribute = ElementUtils.groupIfNeeded(Iterables.concat(filterElts, attrElts, optAttrElts)); + } else { + effectiveAttribute = ElementUtils.groupIfNeeded(Iterables.concat(attrElts, optAttrElts)); + } + + standardQuery.setQueryPattern(effectiveFilter); + + selectQuery = preprocessQueryForPartitionWithSubSelect( + standardQuery, + partitionVars, + effectiveAttribute, + essentialProjectVars, + partitionOrderBy, + varGen); + } + + System.err.println(selectQuery); + +// selectQuery = preprocessQueryForPartition( +// standardQuery, +// partitionVars, +// attributeElement, +// essentialProjectVars, +// partitionOrderBy, +// varGen); + + Function tableToGraph = createTableToGraphMapper( + template, + trackedTemplateNodes, + idMapping, + exprListEval, + graphSupplier); + + return new EntityQueryProcessed(partitionVars, selectQuery, trackedTemplateNodes, tableToGraph); + } + + + public static Flowable execQueryActual( + SparqlQueryConnection conn, + List partitionVars, + Set trackedTemplateNodes, + Query selectQuery, Function tableToGraph) { + Flowable result = execSelectPartitioned( + conn, selectQuery, partitionVars) + /* + // This map operation sorts the entities based on the ORDER BY sort conditions + // but this is not really useful; it e.g cannot be used to sort a publication's set of authors + .map(keyAndTable -> { + // Sort the bindings in the table by the sort condition on the entity + + // SELECT ?entityVars { } GROUP BY ?entityVars ORDER BY sort conditions VALUES table + Binding key = keyAndTable.getKey(); + Table table = keyAndTable.getValue(); + + Query entitySort = new Query(); + entitySort.setQuerySelectType(); + entitySort.setQueryPattern(new ElementData( + table.getVars(), + Lists.newArrayList(table.rows()))); + + Generator entityVarGen = VarGeneratorBlacklist.create("entitySortKey", blacklist); + + entitySort = preprocessQueryForPartitionWithSubSelect(entitySort, entityVars, essentialProjectVars, entityOrderBy, entityVarGen); + + System.out.println(entitySort); + Table newTable; + try (QueryExecution qe = QueryExecutionFactory.create(entitySort, entitySortModel)) { + newTable = resultSetToTable(qe.execSelect()); + } + + return Maps.immutableEntry(key, newTable); + }) + */ + .map(keyAndTable -> { + Binding partitionKey = keyAndTable.getKey(); + Table table = keyAndTable.getValue(); + + AccObjectGraph acc = tableToGraph.apply(table); + Graph graph = acc.getValue(); + Set entities = trackedTemplateNodes.stream().map(rootNode -> acc.getTrackedNodes(rootNode)) + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + + GraphPartitionWithEntities r = new GraphPartitionWithEntities(partitionKey, graph, entities); + return r; + }); + + return result; + } + + + /** + * Execute a CONSTRUCT query w.r.t. partitions. For every partition a graph fragment is constructed + * based on bindings that fell into the partition. + * In addition, designate all values in that partition that were bound to the node referred to by + * {@link EntityQueryBasic#getEntityNode()} as 'roots' of that partition. + * Roots serve as designated starting points for traversal of the graph fragment. + * Each root is returned as as separate {@link RDFNode} instance that holds a reference + * to that partition's graph. + * + * @param conn + * @param queryEx + * @param graphSupplier + * @param exprListEval + * @return + */ + public static Flowable execConstructEntities( + SparqlQueryConnection conn, + EntityQueryBasic queryEx, + Supplier graphSupplier, + ExprListEval exprListEval) { + + Flowable result = execConstructPartitionedOld(conn, queryEx, graphSupplier, exprListEval) + .flatMap(graphPartition -> Flowable.fromIterable(graphPartition.getRoots()) + .map(node -> { + Graph graph = graphPartition.getGraph(); + Model model = ModelFactory.createModelForGraph(graph); + RDFNode r = ModelUtils.convertGraphNodeToRDFNode(node, model); + return r; + })); + + return result; + } + + public static Flowable execConstructEntitiesNg( + SparqlQueryConnection conn, + EntityQueryBasic queryEx) { + return execConstructEntitiesNg(conn, queryEx, GraphFactory::createDefaultGraph, EntityQueryRx::defaultEvalToNode); + } + + + /** + * Stream the result of an entity query as named graphs + * + * + * @return + */ + public static Flowable execConstructEntitiesNg( + SparqlQueryConnection conn, + EntityQueryBasic queryEx, + Supplier graphSupplier, + ExprListEval exprListEval) { + + Random random = new Random(); + + String namedGraphHash = BaseEncoding.base64Url().encode( + Hashing.sha256().hashLong(random.nextLong()).asBytes()); + + Node hasEntity = NodeFactory.createURI("http://sparql.org/hasEntity"); + + return execConstructPartitionedOld(conn, queryEx, graphSupplier, exprListEval) + .zipWith(LongStream.iterate(0, i -> i + 1)::iterator, SimpleEntry::new) + .flatMap(graphPartitionAndIndex -> { + long index = graphPartitionAndIndex.getValue(); + GraphPartitionWithEntities graphPartition = graphPartitionAndIndex.getKey(); + + Node ngIri = NodeFactory.createURI("urn:sparql-partition:" + namedGraphHash + "-" + index); + + List quads = new ArrayList<>(); + for (Node entityNode : graphPartition.getRoots()) { + Quad q = new Quad(ngIri, ngIri, hasEntity, entityNode); + + quads.add(q); + ExtendedIterator it = graphPartition.getGraph().find(); + while (it.hasNext()) { + Quad quad = new Quad(ngIri, it.next()); + quads.add(quad); + } + } + + return Flowable.fromIterable(quads); + }); + } + + /** + * Util function to yield a mapper from tables to graphs based on the provided + * arguments. + * + */ + public static Function createTableToGraphMapper( + Template template, + Set trackedTemplateNodes, + Map idMapping, + ExprListEval exprListEval, + Supplier graphSupplier) { + + AggObjectGraph graphAgg = createGraphAggregator(template, trackedTemplateNodes, idMapping, exprListEval, graphSupplier); + + return table -> { + AccObjectGraph acc = graphAgg.createAccumulator(); + table.rows().forEachRemaining(acc::accumulate); + //Graph graph = acc.getValue(); + return acc; + }; + } + + + /** + * Based on the information present in {@link EntityQueryBasic} return a function that + * deterministically yields the same node (possibly a blank node) when passing equivalent bindings + * to it. + * + * @param root + * @param idMapping + * @param exprListEval + * @return + */ + public static Function createKeyFunction( + Node root, + Map idMapping, + ExprListEval exprListEval) { + + Function result; + if (root.isVariable()) { + Var rootVar = (Var)root; + result = b -> b.get(rootVar); + } else if (root.isBlank()) { + // The root node must be mapped to ids + // TODO Currently the limitation is that the mapping must be a list of vars rather than arbitrary expressions + ExprList el = idMapping.get(root); + Objects.requireNonNull(el, "blank node as the root must be mapped to id-generating expressions"); + + result = b -> exprListEval.eval(el, b); + } else { + // Case where the root node is a constant; + // unlikely to be useful but handled for completeness + result = b -> root; + } + + return result; + } + + + public static List getEntityVars( + Node root, + Map idMapping) { + + List result; + if (root.isVariable()) { + Var rootVar = (Var)root; + result = Collections.singletonList(rootVar); + } else if (root.isBlank()) { + // The root node must be mapped to ids + // TODO Currently the limitation is that the mapping must be a list of vars rather than arbitrary expressions + ExprList el = idMapping.get(root); + Objects.requireNonNull(el, "blank node as the root must be mapped to id-generating expressions"); + + Set vars = new LinkedHashSet<>(); + ExprVars.varsMentioned(vars, el); + + result = new ArrayList<>(vars); + } else { + // Case where the root node is a constant; + // unlikely to be useful but handled for completeness + result = Collections.emptyList(); + } + + return result; + } + + /** + * Create an aggregator whose accumulators accumulate graphs from Bindings + * w.r.t. to the provided mapping information. + * + * @param template + * @param idMap + * @param exprListEval + * @param graphSupplier + * @return + */ + public static AggObjectGraph createGraphAggregator( + Template template, + Set trackedTemplateNodes, + Map idMap, + ExprListEval exprListEval, + Supplier graphSupplier) { + + Map> nodeIdGenMap = idMap.entrySet().stream() + .collect(Collectors.toMap( + Entry::getKey, + e -> (binding -> exprListEval.eval(e.getValue(), binding)))); + + AggObjectGraph result = new AggObjectGraph( + template, + trackedTemplateNodes, + graphSupplier, + nodeIdGenMap); + + return result; + } + + + /** + * One of the many ways to create always the same node (equals) + * from the values obtained by evaluating a list of expressions w.r.t. + * a given binding. + * + * @param exprs + * @param binding + * @return + */ + public static Node defaultEvalToNode(ExprList exprs, Binding binding) { + List nodes = exprs.getList().stream() + .map(expr -> ExprUtils.eval(expr, binding)) + .map(NodeValue::asNode) + .collect(Collectors.toList()); + + String label = nodes.toString(); + + Node result = NodeFactory.createBlankNode(label); + return result; + } + + + /** + * A generic flowable transformer that groups consecutive items that evaluate to the same key. + * For every group an accumulator is created that receives the items. + * + * @param The incoming item type + * @param The type of the keys derived from the items + * @param The type of the value accumulated from the items + * @param itemToKey A function that yiels an item's key + * @param aggregator An aggregator for computing a value from the set of items with the same key + * @return + */ + public static FlowableTransformer> aggregateConsecutiveItemsWithSameKey( + Function itemToKey, + Aggregator aggregator) { + return upstream -> upstream + .lift(OperatorOrderedGroupBy.>create( + itemToKey, + groupKey -> aggregator.createAccumulator(), + Accumulator::accumulate)) + .map(keyAndAcc -> { + KEY groupKey = keyAndAcc.getKey(); + Accumulator accGraph = keyAndAcc.getValue(); + + VALUE g = accGraph.getValue(); + return Maps.immutableEntry(groupKey, g); + }); + } + + + /** + * Return the sets of variables used in the template and the id mapping. + * + * + * @param objectQuery + * @return + */ +// public static Set getRequiredVars(ObjectQuery query) { +// return getEssentialProjectVars(query.getTemplate(), query.getIdMapping()); +// } + +// public static Set getRequiredVars(QueryEx query) { +// return getEssentialProjectVars(query.getConstructTemplate(), query.getIdMapping()); +// } + + +// public static List getExprListVars(ExprList exprs) { +// Set result = new LinkedHashSet(); +// for (Expr exprs : idMapping.values()) { +// ExprVars.varsMentioned(result, exprs); +// } +// +// return new ArrayList<>(result); +// } + + public static Set getEssentialProjectVars(Template template, Map idMapping) { + Set result = new LinkedHashSet<>(); + + for (ExprList exprs : idMapping.values()) { + ExprVars.varsMentioned(result, exprs); + } + + result.addAll(QuadPatternUtils.getVarsMentioned(template.getQuads())); + + return result; + } + + /** + * Prepend a given sequence of sort conditions to those + * already in the query (if there are already any). + * Duplicate sort conditions are removed in the process + * + * @param query + * @param sortConditions The sort conditions. If null or empty this method becomes a no-op. + * @return The input query + */ + public static Query prependToOrderBy(Query query, List sortConditions) { + if (sortConditions != null && !sortConditions.isEmpty()) { + Stream newConditions; + + if (query.hasOrderBy()) { + // We need to make a copy using Sets.newLinkedHashSet because we are going to change query.getOrderBy() + newConditions = Sets.newLinkedHashSet(Iterables.concat(sortConditions, query.getOrderBy())).stream(); + query.getOrderBy().clear(); + } else { + newConditions = sortConditions.stream(); + } + + newConditions.forEach(query::addOrderBy); + } + + return query; + } + + /** Create sort conditions with the given directions from an iterable of {@link Expr}s */ + public static List createSortConditionsFromExprs(Iterable exprs, int dir) { + List result = exprs == null + ? null + : Streams.stream(exprs) + .map(expr -> new SortCondition(expr, dir)) + .collect(Collectors.toList()); + return result; + } + + + /** Util function to create sort conditions from variables and a direction */ + public static List createSortConditionsFromVars(Iterable vars, int dir) { + List result = vars == null + ? null + : Streams.stream(vars) + .map(var -> new SortCondition(new ExprVar(var), dir)) + .collect(Collectors.toList()); + return result; + } + +// +// public static Query preprocessQueryForPartition( +// Query baseQuery, +// List partitionVars, +// Element attributeElement, +// Set requiredVars, +// List partitionOrderBy, +// Generator varGenerator) { +// +// boolean needsSubSelect = !(partitionOrderBy == null || partitionOrderBy.isEmpty()) +// || baseQuery.hasLimit() +// || baseQuery.hasOffset(); +// +// Query result = needsSubSelect +// ? preprocessQueryForPartitionWithSubSelect(baseQuery, partitionVars, attributeElement, requiredVars, partitionOrderBy, varGenerator) +// : preprocessQueryForPartitionWithoutSubSelect(baseQuery, partitionVars, attributeElement, requiredVars, true); +// +// System.err.println(result); +// return result; +// } + + + public static Query preprocessQueryForPartitionWithSubSelect( + Query entityQuery, + List partitionVars, + Element attributeElement, + Set requiredVars, + List partitionOrderBy, + Generator varGenerator) { + + Query result = preprocessQueryForPartitionWithoutSubSelect( + entityQuery, + partitionVars, + //attributeElement, + requiredVars, + true); + + partitionOrderBy = partitionOrderBy == null + ? Collections.emptyList() + : partitionOrderBy; + + // Allocate variables for each sort condition + List sortKeyVars = partitionOrderBy.stream() + .map(x -> varGenerator.next()) + .collect(Collectors.toList()); + + Element basePattern = result.getQueryPattern(); + + Query subSelect = new Query(); + subSelect.setQuerySelectType(); + subSelect.setQueryPattern(basePattern); + + for (Var partitionVar : partitionVars) { + subSelect.addResultVar(partitionVar); + subSelect.addGroupBy(partitionVar); + } + + + for (int i = 0; i < partitionOrderBy.size(); ++i) { + SortCondition sc = partitionOrderBy.get(i); + Var scv = sortKeyVars.get(i); + + // TODO The sort condition will contain an aggregate function + // that must be allocated on the query + //subSelect.allocAggregate(agg) + Expr rawExpr = sc.getExpression(); + Expr expr = ExprTransformer.transform(new ExprTransformAllocAggregate(subSelect), rawExpr); + subSelect.addResultVar(scv, expr); + + subSelect.addOrderBy(new SortCondition(expr, sc.getDirection())); + } + + + // Limit / offset have to be placed on the inner query + boolean hasSlice = result.hasLimit() || result.hasOffset(); + if (hasSlice) { + boolean useWrapper = false; + + if (useWrapper) { + Query sliceWrapper = new Query(); + sliceWrapper.setQuerySelectType(); + sliceWrapper.setQueryResultStar(true); + sliceWrapper.setQueryPattern(new ElementSubQuery(subSelect)); + result.setQueryPattern(new ElementSubQuery(sliceWrapper)); + subSelect = sliceWrapper; + } + + subSelect.setLimit(result.getLimit()); + subSelect.setOffset(result.getOffset()); + + result.setLimit(Query.NOLIMIT); + result.setOffset(Query.NOLIMIT); + + } + + + + + ElementGroup newPattern = ElementUtils.createElementGroup(new ElementSubQuery(subSelect)); + ElementUtils.copyElements(newPattern, attributeElement); + + // Update the query pattern + result.setQueryPattern(newPattern); + + + // Prepend the sort conditions + List partitionScs = new ArrayList<>(); + for (int i = 0; i < partitionOrderBy.size(); ++i) { + SortCondition sc = partitionOrderBy.get(i); + Var scv = sortKeyVars.get(i); + + partitionScs.add(new SortCondition(scv, sc.getDirection())); + } + prependToOrderBy(result, partitionScs); + + return result; + } + + + /** + * Return a SELECT query from the given query where + * - it is ensured that all partitionVars are part of the projection (if they aren't already) + * - distinct is applied in preparation to instantiation of construct templates (where duplicates can be ignored) + * - if sortRowsByPartitionVar is true then result bindings are sorted by the primary key vars + * so that bindings that belong together are consecutive + * - In case of a construct template without variables variable free is handled + * + * @param baseQuery + * @param partitionVars + * @param requiredVars The variables that need to be projected in the resulting query + * @param sortRowsByPartitionVar + * @return + */ + public static Query preprocessQueryForPartitionWithoutSubSelect( + Query baseQuery, + List partitionVars, + // Element attributeElement, // attribute element is assumed to be aligned with baseQueryat this point + Set requiredVars, + boolean sortRowsByPartitionVars) { + + Query selectQuery = baseQuery.cloneQuery(); + selectQuery.setQuerySelectType(); + selectQuery.setQueryResultStar(false); + + VarExprList project = selectQuery.getProject(); + + VarExprListUtils.addAbsentVars(project, partitionVars); + VarExprListUtils.addAbsentVars(project, requiredVars); + + // Handle the corner case where no variables are requested + if (project.isEmpty()) { + // If the template is variable free then project the first variable of the query pattern + // If the query pattern is variable free then just use the result star + Set patternVars = SetUtils.asSet(PatternVars.vars(selectQuery.getQueryPattern())); + if(patternVars.isEmpty()) { + selectQuery.setQueryResultStar(true); + } else { + Var v = patternVars.iterator().next(); + selectQuery.setQueryResultStar(false); + selectQuery.getProject().add(v); + } + } + + selectQuery.setDistinct(true); + + if (sortRowsByPartitionVars) { + List newSortConditions = createSortConditionsFromVars(partitionVars, Query.ORDER_DEFAULT); + prependToOrderBy(selectQuery, newSortConditions); + } + + return selectQuery; + } +} + + +// public static Flowable execConstructRooted( +// SparqlQueryConnection conn, +// RootedQuery rootedQuery, +// Supplier graphSupplier, +// ExprListEval evalFn) { +// +// ObjectQuery objectQuery = rootedQuery.getObjectQuery(); +// +// Node root = rootedQuery.getRootNode(); +// +// Query selectQuery = objectQuery.getRelation().toQuery(); +// Set requiredVars = getRequiredVars(objectQuery); +// +// List partitionVars; +// Function keyToNode; +// +// if (root.isVariable()) { +// Var rootVar = (Var)root; +// partitionVars = Collections.singletonList(rootVar); +// // pkExprs = new ExprList(new ExprVar(rootVar)); +// keyToNode = b -> b.get(rootVar); +// } else if (root.isBlank()) { +// // The root node must be mapped to ids +// // TODO Currently the limitation is that the mapping must be a list of vars rather than arbitrary expressions +// ExprList el = objectQuery.getIdMapping().get(root); +// Objects.requireNonNull(el, "blank node as the root must be mapped to id-generating expressions"); +// +// partitionVars = el.getListRaw().stream() +// .map(ExprVars::getVarsMentioned) +// .flatMap(Collection::stream) +// .distinct() +// .collect(Collectors.toList()); +// +// keyToNode = b -> evalFn.eval(el, b); +// } else { +// // Case where the root node is a constant; +// // unlikely to be useful but handled for completeness +// partitionVars = Collections.emptyList(); +// keyToNode = b -> root; +// } +// +// Query clone = preprocessQueryForPartition(selectQuery, partitionVars, requiredVars, true); +// +// Aggregator agg = createGraphAggregator(objectQuery, evalFn, graphSupplier); +// +// Flowable result = execConstructGrouped(conn::query, agg, clone, partitionVars) +// .map(e -> { +// Binding b = e.getKey(); +// Graph g = e.getValue(); +// +// Node rootNode = keyToNode.apply(b); +// Model m = ModelFactory.createModelForGraph(g); +// +// RDFNode r = ModelUtils.convertGraphNodeToRDFNode(rootNode, m); +// return r; +// }); +// +// return result; +// } + +//public static Flowable execConstructRooted( +//SparqlQueryConnection conn, +//RootedQuery rootedQuery, +//Supplier graphSupplier, +//ExprListEval evalFn) { +// +//ObjectQuery objectQuery = rootedQuery.getObjectQuery(); +// +//Node root = rootedQuery.getRootNode(); +// +//Query selectQuery = objectQuery.getRelation().toQuery(); +//Set requiredVars = getRequiredVars(objectQuery); +// +//List partitionVars; +//Function keyToNode; +// +//if (root.isVariable()) { +//Var rootVar = (Var)root; +//partitionVars = Collections.singletonList(rootVar); +//// pkExprs = new ExprList(new ExprVar(rootVar)); +//keyToNode = b -> b.get(rootVar); +//} else if (root.isBlank()) { +//// The root node must be mapped to ids +//// TODO Currently the limitation is that the mapping must be a list of vars rather than arbitrary expressions +//ExprList el = objectQuery.getIdMapping().get(root); +//Objects.requireNonNull(el, "blank node as the root must be mapped to id-generating expressions"); +// +//partitionVars = el.getListRaw().stream() +// .map(ExprVars::getVarsMentioned) +// .flatMap(Collection::stream) +// .distinct() +// .collect(Collectors.toList()); +// +//keyToNode = b -> evalFn.eval(el, b); +//} else { +//// Case where the root node is a constant; +//// unlikely to be useful but handled for completeness +//partitionVars = Collections.emptyList(); +//keyToNode = b -> root; +//} +// +//Query clone = preprocessQueryForPartition(selectQuery, partitionVars, requiredVars, true); +// +//Aggregator agg = createGraphAggregator(objectQuery, evalFn, graphSupplier); +// +//Flowable result = execConstructGrouped(conn::query, agg, clone, partitionVars) +// .map(e -> { +// Binding b = e.getKey(); +// Graph g = e.getValue(); +// +// Node rootNode = keyToNode.apply(b); +// Model m = ModelFactory.createModelForGraph(g); +// +// RDFNode r = ModelUtils.convertGraphNodeToRDFNode(rootNode, m); +// return r; +// }); +// +//return result; +//} + +//public static Flowable execConstructRooted( +//SparqlQueryConnection conn, +//RootedQuery rootedQuery) { +//return execConstructRooted(conn, rootedQuery, GraphFactory::createDefaultGraph); +//} +// +//public static Flowable execConstructRooted( +//SparqlQueryConnection conn, +//RootedQuery rootedQuery, +//Supplier graphSupplier) { +//return execConstructRooted(conn, rootedQuery, GraphFactory::createDefaultGraph, RootedQueryRx::evalToNode); +//} +// + +//public static FlowableTransformer, Entry> graphsFromPartitions( +// Template template, +// Map idMapping, +// ExprListEval exprListEval, +// Supplier graphSupplier) { +// +// Aggregator graphAgg = createGraphAggregator(template, idMapping, exprListEval, graphSupplier); +// +// return upstream -> +// upstream.map(keyAndTable -> { +// Accumulator acc = graphAgg.createAccumulator(); +// +// Table table = keyAndTable.getValue(); +// table.rows().forEachRemaining(acc::accumulate); +// +// Graph graph = acc.getValue(); +// +// return Maps.immutableEntry(keyAndTable.getKey(), graph); +// }); +//} + + +//public static Flowable> execConstructGrouped( +// Function qeSupp, +// Aggregator aggregator, +// Query clone, +// List primaryKeyVars) { +// +// Function grouper = SparqlRx.createGrouper(primaryKeyVars, false); +// +// Flowable> result = SparqlRx +// // For future reference: If we get an empty results by using the query object, we probably have wrapped a variable with NodeValue.makeNode. +// .execSelectRaw(() -> qeSupp.apply(clone)) +// .compose(aggregateConsecutiveItemsWithSameKey(grouper, aggregator)); +// +// return result; +//} + + +//public static Query appendToProject(Query query, List vars) { +// query.addProjectVars(vars); +//} +// +//public static Query appendToProject(Query query, VarExprList vel) { +// +//} + +//Set trackedNodes = acc.getTrackedNodes(rootNode); +//if (bindingToRootNodeInst != null) { +// +// Iterator it = table.rows(); +// while (it.hasNext()) { +// Binding binding = it.next(); +// Node inst = bindingToRootNodeInst.apply(binding); +// rootNodes.add(inst); +// } +//} \ No newline at end of file diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/AttributeGraphFragment.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/AttributeGraphFragment.java new file mode 100644 index 000000000..ac0528317 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/AttributeGraphFragment.java @@ -0,0 +1,69 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.aksw.jena_sparql_api.rx.EntityGraphFragment; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.sparql.core.Var; + +public class AttributeGraphFragment { + protected List mandatoryJoins; + protected List optionalJoins; + + public AttributeGraphFragment() { + this(new ArrayList<>(), new ArrayList<>()); + } + + public AttributeGraphFragment(List mandatoryJoins, List optionalJoins) { + super(); + this.mandatoryJoins = mandatoryJoins; + this.optionalJoins = optionalJoins; + } + + public List getMandatoryJoins() { + return mandatoryJoins; + } + + public List getOptionalJoins() { + return optionalJoins; + } + + public void setMandatoryJoins(List mandatoryJoins) { + this.mandatoryJoins = mandatoryJoins; + } + + public void setOptionalJoins(List optionalJoins) { + this.optionalJoins = optionalJoins; + } + + /* + * Convenience functions + */ + + public AttributeGraphFragment addMandatoryJoin(Var entityVar, Query query) { + getMandatoryJoins() + .add(new GraphPartitionJoin(EntityGraphFragment.fromQuery( + entityVar, query))); + return this; + } + + public AttributeGraphFragment addOptionalJoin(Var entityVar, Query query) { + getOptionalJoins() + .add(new GraphPartitionJoin(EntityGraphFragment.fromQuery( + entityVar, query))); + return this; + } + + @Override + public String toString() { + String result = + optionalJoins.stream().map(item -> "OPTIONAL " + item).collect(Collectors.joining("\n")) + + mandatoryJoins.stream().map(item -> "" + item).collect(Collectors.joining("\n")); + + return result; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityQueryBasic.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityQueryBasic.java new file mode 100644 index 000000000..f0a1e5e8e --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityQueryBasic.java @@ -0,0 +1,125 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.List; + +import org.aksw.jena_sparql_api.rx.EntityBaseQuery; +import org.aksw.jena_sparql_api.rx.EntityGraphFragment; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.query.SortCondition; +import org.apache.jena.sparql.core.Var; + + +/** + * An extension (wrapper) of Jena's SPARQL {@link Query} model with the following features: + * + *
    + *
  • Partitioning of result sets into sets of bindings that have common values for some of their variables. + * A construct template can be instantiated for each partition individually which leads to a sequence of + * graph fragments
  • + *
  • A certain amount of control over blank node allocation by making their IDs dependent on a tuple of expressions + * over the projected columns. Within a single query execution the blank nodes whose + * corresponding expression tuple yields the same value + * should map to the same final blank node
  • + *
  • Designation of a {@link Node} in the template or a variable of the WHERE pattern as the root node
  • + *
+ * + * + * @author raven + * + */ +public class EntityQueryBasic { + +// protected EntityTemplate entityTemplate; +// protected Query partitionSelector; +// protected List partitionVars; +// protected List sortConditions; + + protected EntityBaseQuery baseQuery; + + protected EntityGraphFragment attributeFragment; + protected EntityGraphFragment optionalAttributeFragment; + + public EntityBaseQuery getBaseQuery() { + return baseQuery; + } + + public void setBaseQuery(EntityBaseQuery baseQuery) { + this.baseQuery = baseQuery; + } + + public EntityGraphFragment getAttributeFragment() { + return attributeFragment; + } + + public void setAttributeFragment(EntityGraphFragment attributeFragment) { + this.attributeFragment = attributeFragment; + } + + public void setOptionalAttributeFragment(EntityGraphFragment optionalAttributeFragment) { + this.optionalAttributeFragment = optionalAttributeFragment; + } + + public EntityGraphFragment getOptionalAttributeFragment() { + return optionalAttributeFragment; + } + + @Override + public String toString() { + return baseQuery + "\n" + attributeFragment + "\n" + optionalAttributeFragment; + } + + /** + * Return the underlying SPARQL 1.1 query for which + * the extensions in this model are defined + * + * @return + */ +// public EntityTemplate getDirectGraphPartition() { +// return entityTemplate; +// } +// +// public void setDirectGraphPartition(EntityTemplate entityTemplate) { +// this.entityTemplate = entityTemplate; +// } +// +// /** +// * The select query that specifies the set of entities, +// * and their order +// * +// * The number and order of the projection variables must +// * match that of the graph partitions and vice versa +// * +// * @return +// */ +// public Query getPartitionSelectorQuery() { +// return partitionSelector; +// } +// +// void setPartitionSelectorQuery(Query query) { +// this.partitionSelector = query; +// } +// +// +// public EntityGraphFragment getAttributeFragment() { +// return attributeFragment; +// } +// +// public void setAttributeFragment(EntityGraphFragment attributeFragment) { +// this.attributeFragment = attributeFragment; +// } +// +// /** +// * The variables of the base select query by which to partition +// * +// * @return +// */ +// List getPartitionVars() { +// return this.partitionVars; +// } +// +// List getPartitionOrderBy() { +// return this.sortConditions; +// } + +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityQueryImpl.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityQueryImpl.java new file mode 100644 index 000000000..f0c2f1054 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityQueryImpl.java @@ -0,0 +1,119 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.aksw.jena_sparql_api.rx.EntityBaseQuery; +import org.aksw.jena_sparql_api.rx.EntityGraphFragment; +import org.aksw.jena_sparql_api.utils.Vars; +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.engine.binding.BindingFactory; +import org.apache.jena.sparql.syntax.Element; +import org.apache.jena.sparql.syntax.ElementData; + + +/** Basic implementation of {@link EntityQueryBasic} */ +public class EntityQueryImpl +{ + protected EntityBaseQuery baseQuery; + protected AttributeGraphFragment attributePart; + + public EntityQueryImpl() { + this(null, new AttributeGraphFragment()); + } + + public EntityQueryImpl(EntityBaseQuery baseQuery, AttributeGraphFragment attributePart) { + super(); + this.baseQuery = baseQuery; + this.attributePart = attributePart; + } + + public EntityBaseQuery getBaseQuery() { + return baseQuery; + } + + public void setBaseQuery(EntityBaseQuery baseQuery) { + this.baseQuery = baseQuery; + } + + public AttributeGraphFragment getAttributePart() { + return attributePart; + } + + public void setAttributePart(AttributeGraphFragment attributePart) { + this.attributePart = attributePart; + } + + public List getMandatoryJoins() { + return attributePart.getMandatoryJoins(); + } + + public List getOptionalJoins() { + return attributePart.getOptionalJoins(); + } + + public void setOptionalJoins(List optionalJoins) { + this.attributePart.setOptionalJoins(optionalJoins); + } + + public static EntityQueryImpl createEntityQuery(Var entityVar, Query standardQuery) { + EntityBaseQuery ebq = new EntityBaseQuery( + Collections.singletonList(entityVar), + new EntityTemplateImpl(), standardQuery); + EntityQueryImpl result = new EntityQueryImpl(); + result.setBaseQuery(ebq); + + return result; + } + + public static Query createStandardQuery(Var entityVar, Node node) { + Query result = createStandardQuery(entityVar, Collections.singleton(node)); + return result; + } + + public static Query createStandardQuery(Var entityVar, Collection nodes) { + Query result = createStandardQuery(entityVar, new ElementData( + Collections.singletonList(entityVar), + nodes.stream().map(n -> BindingFactory.binding(Vars.s, n)).collect(Collectors.toList()))); + + return result; + } + + public static Query createStandardQuery(Var entityVar, Element element) { + Query query = new Query(); + query.setQuerySelectType(); + query.getProject().add(entityVar); + query.setQueryPattern(element); + + return query; + } + + + /** + * A convenience function to create an entity query for a specific entity (denoted by the node) + * + * @param entityGraphFragment + * @param node + * @return + */ + public static EntityQueryImpl createEntityQuery(EntityGraphFragment entityGraphFragment, Node node) { + Var entityVar = Vars.s; + + Query standardQuery = createStandardQuery(entityVar, node); + EntityQueryImpl result = EntityQueryImpl.createEntityQuery(entityVar, standardQuery); + + result.getMandatoryJoins().add(new GraphPartitionJoin(entityGraphFragment)); + + return result; + } + + + @Override + public String toString() { + return attributePart + " " + baseQuery; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityTemplate.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityTemplate.java new file mode 100644 index 000000000..c0ba58995 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityTemplate.java @@ -0,0 +1,64 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import org.apache.jena.sparql.core.BasicPattern; +import org.apache.jena.sparql.expr.ExprList; +import org.apache.jena.sparql.graph.NodeTransform; +import org.apache.jena.sparql.syntax.Template; + +public interface EntityTemplate { + + EntityTemplate cloneTemplate(); + + Template getTemplate(); + void setTemplate(Template template); + + List getEntityNodes(); +// void setEntityNode(Node rootVar); + + Map getBnodeIdMapping(); + +// default void setEntityNode(String rootVarName) { +// setEntityNode(Var.alloc(rootVarName)); +// } + + EntityTemplate applyNodeTransform(NodeTransform nodeTransform); + + + /** + * Combine the information from multiple EntityTemplates into a single one + * This operation should only be performed after processing variable names + * such that no clashes occur. + * + * @param templates + * @return + */ + static EntityTemplate merge(EntityTemplate ... templates) { + Set triples = new LinkedHashSet<>(); + Set entityNodes = new LinkedHashSet<>(); + Map bnodeIdMapping = new LinkedHashMap<>(); + + for (EntityTemplate template : templates) { + triples.addAll(template.getTemplate().getBGP().getList()); + entityNodes.addAll(template.getEntityNodes()); + + // TODO Ensure there is are no clashes + bnodeIdMapping.putAll(template.getBnodeIdMapping()); + } + + EntityTemplateImpl result = new EntityTemplateImpl( + new ArrayList<>(entityNodes), + new Template(BasicPattern.wrap(new ArrayList<>(triples))), + bnodeIdMapping); + + return result; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityTemplateImpl.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityTemplateImpl.java new file mode 100644 index 000000000..5694f5bd6 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/EntityTemplateImpl.java @@ -0,0 +1,119 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.core.BasicPattern; +import org.apache.jena.sparql.expr.ExprList; +import org.apache.jena.sparql.graph.NodeTransform; +import org.apache.jena.sparql.graph.NodeTransformLib; +import org.apache.jena.sparql.syntax.Template; + +/** + * In an entity query the construct/entity part this is based directly on + * the entity selector SELECT query + * + * As such, it neither declares its own WHERE pattern nor partition variables + * as they are based directly on the selector query. + * + * + * @author raven + * + */ +public class EntityTemplateImpl + implements EntityTemplate +{ + protected List entityNodes; + protected Template template; + protected Map bnodeIdMapping; + + public static Map applyNodeTransformBnodeMap(NodeTransform nodeTransform, Map map) { + Map result = map.entrySet().stream() + .collect(Collectors.toMap( + e -> nodeTransform.apply(e.getKey()), + e -> NodeTransformLib.transform(nodeTransform, e.getValue()))); + return result; + } + + public EntityTemplateImpl() { + this(new ArrayList<>(), new Template(new BasicPattern())); + } + + + public EntityTemplateImpl(List entityNodes, Template template) { + this(entityNodes, template, new LinkedHashMap<>()); + } + + public EntityTemplateImpl(List entityNodes, Template template, Map bnodeIdMapping) { + super(); + this.entityNodes = entityNodes; + this.template = template; + this.bnodeIdMapping = bnodeIdMapping; + } + + public EntityTemplateImpl cloneTemplate() { + return new EntityTemplateImpl( + new ArrayList<>(entityNodes), + new Template(BasicPattern.wrap(new ArrayList<>(template.getTriples()))), + new LinkedHashMap<>(bnodeIdMapping)); + } + + /** + * Template to construct graphs directly from the given select + * query (avoids having to repeat the select query's pattern as a graph partition) + * + * @return + */ + public Template getTemplate() { + return template; + } + + @Override + public List getEntityNodes() { + return entityNodes; + } + +// @Override +// public void setEntityNode(Node entityNode) { +// this.entityNode = entityNode; +// } + + @Override + public Map getBnodeIdMapping() { + return bnodeIdMapping; + } + + @Override + public void setTemplate(Template template) { + this.template = template; + } + + public static List transformNodes(NodeTransform nodeTransform, List varList) { + List varList2 = new ArrayList<>(varList.size()) ; + for ( Node v : varList ) { + Node v2 = nodeTransform.apply(v) ; + varList2.add(v2) ; + } + return varList2 ; + } + + @Override + public EntityTemplate applyNodeTransform(NodeTransform nodeTransform) { + return new EntityTemplateImpl( + transformNodes(nodeTransform, entityNodes), + new Template(NodeTransformLib.transform(nodeTransform, template.getBGP())), + applyNodeTransformBnodeMap(nodeTransform, bnodeIdMapping) + ); + } + + + @Override + public String toString() { + return template.getGraphPattern() + ", entity nodes " + entityNodes + ", bnodeIdMapping " + bnodeIdMapping; + } + +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/ExprListEval.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/ExprListEval.java new file mode 100644 index 000000000..dd97fa825 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/ExprListEval.java @@ -0,0 +1,18 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.engine.binding.Binding; +import org.apache.jena.sparql.expr.ExprList; + +/** + * A helper interface to abstract the concrete method by which + * a Node is computed from a list of expressions w.r.t. a Binding. + * + * This method is used to allocate nodes + * + * @author raven + * + */ +public interface ExprListEval { + Node eval(ExprList el, Binding binding); +} \ No newline at end of file diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartition.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartition.java new file mode 100644 index 000000000..4bc22b0de --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartition.java @@ -0,0 +1,51 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.List; + +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.graph.NodeTransform; +import org.apache.jena.sparql.syntax.Element; + +/** + * A combination of a where pattern with designated variables that act as keys + * and a graph pattern + * + * ENTITY ?v1 ... ?vn + * CONSTRUCT { ?v1 rdfs:label ?} + * WHERE { ?v1 ... ?vn } + * + * @author raven + * + */ +public interface GraphPartition +{ + /** + * Graph partitions in with a non-null fetch group name + * will be fetched using separate lookups rather than combining their graph patterns + * into the attribute part of the base query. + * + * Graph partitions in the same fetch group will be retrieved using a union. + * + * An example using virtuoso pragma style syntax would be: + * + * DEFINE :fetchGroup "foo" CONSTRUCT WHERE { ?x a ?t } PARTITION BY ?x + * DEFINE :fetchGroup "foo" CONSTRUCT WHERE { ?y :label ?l} PARTITION BY ?y + * + * effective pattern (with ?z the aligned name to create a join on ?x and ?y): + * { ?z a ?t . ?z label ?l } + * + */ + String getLazyFetchGroupName(); + void setLazyFetchGroupName(String name); + + + EntityTemplate getEntityTemplate(); + + Element getElement(); + void setElement(Element element); + + List getPartitionVars(); + + GraphPartition applyNodeTransform(NodeTransform nodeTransform); +} + diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartitionJoin.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartitionJoin.java new file mode 100644 index 000000000..5b9661c1c --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartitionJoin.java @@ -0,0 +1,99 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.List; + +import org.aksw.jena_sparql_api.rx.EntityGraphFragment; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.graph.NodeTransform; + + +public class GraphPartitionJoin +{ + protected EntityGraphFragment entityGraphFragment; + + protected List parentJoinVars; // if null the join will occur on the parent's partition vars + protected String lazyFetchGroupName; + + protected List subJoins; + + public GraphPartitionJoin(EntityGraphFragment entityGraphFragment) { + this(entityGraphFragment, null, null, null); + } + + public GraphPartitionJoin(EntityGraphFragment entityGraphFragment, + List parentJoinVars, String lazyFetchGroup, List< GraphPartitionJoin> subJoins) { + super(); + this.entityGraphFragment = entityGraphFragment; + this.parentJoinVars = parentJoinVars; + this.lazyFetchGroupName = lazyFetchGroup; + this.subJoins = subJoins; +// this.isOptional = isOptional; + } + +// public static GraphPartitionJoin create(Query query, List partitionVars, Node entityNode) { +// return new GraphPartitionJoin( +// new EntityGraphFragment( +// partitionVars, +// new EntityTemplateImpl(Collections.singletonList(entityNode), query.getConstructTemplate()), +// query.getQueryPattern()), +// null); +// } + + public List getParentJoinVars() { + return parentJoinVars; + } + + public EntityGraphFragment getEntityGraphFragment() { + return entityGraphFragment; + } + + public GraphPartitionJoin applyNodeTransform(NodeTransform nodeTransform) { + + // FIXME We need to handle renaming of parent join vars + + +// List newJoins = new ArrayList<>(); +// for (GraphPartitionJoin subJoin : subJoins) { +// List vars = subJoin.getParentJoinVars(); +// List newVars = NodeTransformLib.transformVars(nodeTransform, vars); +// } + + + GraphPartitionJoin result = new GraphPartitionJoin( +// isOptional, + entityGraphFragment.applyNodeTransform(nodeTransform), + parentJoinVars, + lazyFetchGroupName, + subJoins); + + return result; + } + + public String getLazyFetchGroupName() { + return lazyFetchGroupName; + } + + + public void setLazyFetchGroupName(String name) { + this.lazyFetchGroupName = name; + } + + public List getSubJoins() { + return subJoins; + } + + @Override + public String toString() { + return "GraphPartitionJoin [entityGraphFragment=" + entityGraphFragment + ", parentJoinVars=" + parentJoinVars + + ", lazyFetchGroupName=" + lazyFetchGroupName + ", subJoins=" + subJoins + "]"; + } + +// public boolean isOptional() { +// return isOptional; +// } +// +// public void setOptional(boolean isOptional) { +// this.isOptional = isOptional; +// } +} + diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartitionWithEntities.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartitionWithEntities.java new file mode 100644 index 000000000..6d69f4f85 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/entity/model/GraphPartitionWithEntities.java @@ -0,0 +1,42 @@ +package org.aksw.jena_sparql_api.rx.entity.model; + +import java.util.Set; + +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.engine.binding.Binding; + +/** + * A 'result row' of a partitioned construct query: + * The key of the partition is a binding, + * the graph is an RDF graph + * and roots are a set of nodes in the graph that act + * as the requested starting points for traversal. + * + * @author raven + * + */ +public class GraphPartitionWithEntities { + protected Binding binding; + protected Graph graph; + protected Set roots; + + public GraphPartitionWithEntities(Binding binding, Graph graph, Set roots) { + super(); + this.binding = binding; + this.graph = graph; + this.roots = roots; + } + + public Binding getBinding() { + return binding; + } + + public Graph getGraph() { + return graph; + } + + public Set getRoots() { + return roots; + } +} diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/op/OperatorLocalOrder.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/op/OperatorLocalOrder.java new file mode 100644 index 000000000..aa20b5142 --- /dev/null +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/op/OperatorLocalOrder.java @@ -0,0 +1,295 @@ +package org.aksw.jena_sparql_api.rx.op; + +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiFunction; +import java.util.function.Function; + +import org.reactivestreams.Subscriber; +import org.reactivestreams.Subscription; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.reactivex.rxjava3.annotations.NonNull; +import io.reactivex.rxjava3.core.FlowableOperator; +import io.reactivex.rxjava3.core.FlowableSubscriber; +import io.reactivex.rxjava3.internal.subscriptions.SubscriptionHelper; + +public final class OperatorLocalOrder + implements FlowableOperator { + + private static final Logger logger = LoggerFactory.getLogger(OperatorLocalOrder.class); + + protected Function extractSeqId; + protected Function incrementSeqId; + + protected BiFunction distanceFn; + + protected S initialExpectedSeqId; + + + public OperatorLocalOrder( + S initialExpectedSeqId, + Function incrementSeqId, + BiFunction distanceFn, + Function extractSeqId) { + super(); + this.extractSeqId = extractSeqId; + this.incrementSeqId = incrementSeqId; + this.distanceFn = distanceFn; + this.initialExpectedSeqId = initialExpectedSeqId; + } + + + public class SubscriberImpl + implements FlowableSubscriber, Subscription + { + protected Subscriber downstream; + protected Subscription upstream; + + protected S expectedSeqId = initialExpectedSeqId; + protected boolean isComplete = false; + + protected NavigableMap seqIdToValue = new TreeMap<>((a, b) -> distanceFn.apply(a, b).intValue()); + + protected AtomicLong pending = new AtomicLong(); + + public SubscriberImpl(Subscriber downstream) + { + this.downstream = downstream; + } + + @Override + public void onSubscribe(Subscription s) { + if (upstream != null) { + s.cancel(); + } else { + upstream = s; + downstream.onSubscribe(this); + } + } + + public void onNext(T value) { +// if(delegate.isCancelled()) { +// throw new RuntimeException("Downstream cancelled"); +// } + + S seqId = extractSeqId.apply(value); + +// System.err.println("ENCOUNTERED CONTRIB " + seqId + " WITH QUEUE size " + seqIdToValue.keySet().size()); + // If complete, the seqId must not be higher than the latest seen one + if(isComplete) { + if(seqIdToValue.isEmpty()) { + downstream.onError(new RuntimeException("Sanity check failed: Call to onNext encountered after completion.")); + } + + + S highestSeqId = seqIdToValue.descendingKeySet().first(); + + if(distanceFn.apply(seqId, highestSeqId).intValue() > 0) { + downstream.onError(new RuntimeException("Sequence was marked as complete with id " + highestSeqId + " but a higher id was encountered " + seqId)); + } + } + + boolean checkForExistingKeys = true; + if(checkForExistingKeys) { + if(seqIdToValue.containsKey(seqId)) { + downstream.onError(new RuntimeException("Already seen an item with id " + seqId)); + } + } + + // If the distance is too great block the thread + //synchronized (this) { +// int dd; +// while((dd = distanceFn.apply(seqId, expectedSeqId).intValue()) > maxIdDistance) { +// System.err.println("DISTANCE FROM expected " + expectedSeqId + " TO contrib " + seqId + " IS " + dd + " GOING TO SLEEP " + Thread.currentThread()); + // +// try { +// //System.err.println("DISTANCE " + d + " TO GREAT - SLEEPING " + Thread.currentThread()); +// // this.wait(); +// Thread.sleep(100); +// break; +// } catch(InterruptedException e) { +// throw new RuntimeException(e); +// } + // +//// if(delegate.isCancelled()) { +//// throw new RuntimeException("Downstream cancelled"); +//// } +// } + // +// System.err.println("DISTANCE FROM expected " + expectedSeqId + " TO contrib " + seqId + " IS " + dd + " " + Thread.currentThread()); +// // } + + //synchronized(this) { + // Add item to the map + seqIdToValue.put(seqId, value); + + // Consume consecutive items from the map + Iterator> it = seqIdToValue.entrySet().iterator(); + while(it.hasNext() && pending.get() > 0) { +// if(delegate.isCancelled()) { +// throw new RuntimeException("Downstream cancelled"); +// } + + + Entry e = it.next(); + S s = e.getKey(); + T v = e.getValue(); + + int d = distanceFn.apply(s, expectedSeqId).intValue(); + if(d == 0) { + it.remove(); + pending.decrementAndGet(); + downstream.onNext(v); + expectedSeqId = incrementSeqId.apply(expectedSeqId); + // this.notifyAll(); + //System.out.println("expecting seq id " + expectedSeqId); + } else if(d < 0) { + // Skip values with a lower id + // TODO Add a flag to emit onError event + logger.warn("Should not happen: received id " + s + " which is lower than the expected id " + expectedSeqId); + it.remove(); + } else { // if d > 0 + // Wait for the next sequence id + logger.trace("Received id " + s + " but first need to wait for expected id " + expectedSeqId); + break; + } + } + + // If the completion mark was set and all items have been emitted, we are done + if(isComplete && seqIdToValue.isEmpty()) { + downstream.onComplete(); + } else { + // If there are pending items in the seqIdToValue queue we need + // to fetch more items from upstream + if(pending.get() > 0) { + upstream.request(1); + } + } + } + @Override + public void onError(Throwable t) { + downstream.onError(t); + } + + public void onComplete() { + isComplete = true; + + // If there are no more entries in the map, complete the downstreaem immediately + if(seqIdToValue.isEmpty()) { + downstream.onComplete(); + } + + // otherwise, the onNext method has to handle completion + } + + @Override + public void request(long n) { + if (SubscriptionHelper.validate(n)) { + pending.addAndGet(n); + upstream.request(1); + } + } + + @Override + public void cancel() { + upstream.cancel(); + } + } + + + public static OperatorLocalOrder forLong(long initiallyExpectedId, Function extractSeqId) { + return new OperatorLocalOrder(initiallyExpectedId, id -> Long.valueOf(id.longValue() + 1l), (a, b) -> a - b, extractSeqId); + } + + public static > OperatorLocalOrder wrap(S initiallyExpectedId, Function incrementSeqId, BiFunction distanceFn, Function extractSeqId) { + return new OperatorLocalOrder(initiallyExpectedId, incrementSeqId, distanceFn, extractSeqId); + } + + public static > FlowableOperator create( + S initialExpectedSeqId, + Function incrementSeqId, + BiFunction distanceFn, + Function extractSeqId) { + return new OperatorLocalOrder(initialExpectedSeqId, incrementSeqId, distanceFn, extractSeqId); + } + + @Override + public @NonNull Subscriber apply(@NonNull Subscriber downstream) + throws Throwable { + return new SubscriberImpl(downstream); + } + +// public static > FlowableTransformer transformer(S initiallyExpectedId, Function incrementSeqId, BiFunction distanceFn, Function extractSeqId) { +// +// return upstream -> { +// Flowable result = Flowable.create(new FlowableOnSubscribe() { +// +// @Override +// public void subscribe(FlowableEmitter e) throws Exception { +// FlowableTransformerLocalOrdering tmp = wrap( +// initiallyExpectedId, +// incrementSeqId, +// distanceFn, +// extractSeqId, +// e); +// +// upstream.subscribe(new FlowableSubscriber() { +// @Override +// public void onSubscribe(Subscription s) { +// e.setCancellable(s::cancel); +// s.request(Long.MAX_VALUE); +// } +// +// @Override +// public void onNext(T t) { +// tmp.onNext(t); +// } +// +// @Override +// public void onError(Throwable t) { +// tmp.onError(t); +// } +// +// @Override +// public void onComplete() { +// tmp.onComplete(); +// } +// +// }); +// +//// Disposable[] d = {null}; +//// d[0] = upstream.subscribe( +//// item -> { +//// if(e.isCancelled()) { +//// Disposable x = d[0]; +//// if(x != null) { +//// x.dispose(); +//// } +//// } else { +//// tmp.onNext(item); +//// } +//// }, +//// Exceptions::propagate, +//// // tmp::onError, +//// tmp::onComplete); +////// e.setCancellable(() -> { +////// System.out.println("CANCELLED"); +////// }); +//// e.setDisposable(d[0]); +//// // System.out.println("Done"); +//// // FIXME Something might be broken in the design, as +//// // upstream.subscribe(tmp) does NOT work +// +// +// } +// }, BackpressureStrategy.BUFFER); +// +// return result; +// }; +// } +} \ No newline at end of file diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/op/OperatorOrderedGroupBy.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/op/OperatorOrderedGroupBy.java index 0298c4400..a5978f8ff 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/op/OperatorOrderedGroupBy.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/op/OperatorOrderedGroupBy.java @@ -15,6 +15,7 @@ import io.reactivex.rxjava3.core.FlowableSubscriber; import io.reactivex.rxjava3.core.FlowableTransformer; import io.reactivex.rxjava3.internal.subscriptions.SubscriptionHelper; +import io.reactivex.rxjava3.internal.util.BackpressureHelper; /** * Ordered group by; somewhat similar to .toListWhile() but with dedicated support for @@ -115,27 +116,34 @@ public void onSubscribe(Subscription s) { @Override public void onNext(T item) { +// System.out.println("ONNEXT PENDING: " + pending.get() + " " + Thread.currentThread()); +// if (pending.get() <= 0) { +//// System.out.println("PENDING IS ZERO " + Thread.currentThread()); +//// System.out.println("PENDING IS " + pending.get()); +// throw new RuntimeException("Received item without any pending requests"); +// } currentKey = getGroupKey.apply(item); + boolean needMore = true; if(currentAcc == null) { // First time init priorKey = currentKey; currentAcc = accCtor.apply(currentKey); Objects.requireNonNull(currentAcc, "Got null for an accumulator"); - } else if(!groupKeyCompare.apply(priorKey, currentKey)) {//if(!Objects.equals(priorKey, currentKey)) { + } else if(!groupKeyCompare.apply(priorKey, currentKey)) { Entry e = Maps.immutableEntry(priorKey, currentAcc); // System.out.println("Passing on " + e); + needMore = pending.decrementAndGet() > 0; downstream.onNext(e); - pending.decrementAndGet(); currentAcc = accCtor.apply(currentKey); } accAdd.accept(currentAcc, item); priorKey = currentKey; - if(pending.get() > 0) { + if (needMore) { upstream.request(1); } @@ -149,6 +157,7 @@ public void onError(Throwable t) { @Override public void onComplete() { if(currentAcc != null) { + // System.out.println("EMITTED ITEM ON COMPLETE"); downstream.onNext(Maps.immutableEntry(currentKey, currentAcc)); } @@ -158,8 +167,11 @@ public void onComplete() { @Override public void request(long n) { if (SubscriptionHelper.validate(n)) { - pending.addAndGet(n); + BackpressureHelper.add(pending, n); +// pending.addAndGet(n); +// System.out.println("BEFORE REQUESTED " + n + " total pending " + pending.get() + " " + Thread.currentThread()); upstream.request(1); +// System.out.println("AFTER REQUESTED " + n + " total pending " + pending.get() + " " + Thread.currentThread()); } } diff --git a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/query_flow/RxUtils.java b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/query_flow/RxUtils.java index 22d717ccf..c8075d7e1 100644 --- a/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/query_flow/RxUtils.java +++ b/jena-sparql-api-rx/src/main/java/org/aksw/jena_sparql_api/rx/query_flow/RxUtils.java @@ -3,12 +3,16 @@ import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; +import java.util.function.Predicate; import org.reactivestreams.Subscription; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import io.reactivex.rxjava3.core.BackpressureStrategy; import io.reactivex.rxjava3.core.Flowable; @@ -24,11 +28,37 @@ import io.reactivex.rxjava3.internal.queue.SpscArrayQueue; public class RxUtils { + private static final Logger logger = LoggerFactory.getLogger(RxUtils.class); + + /** + * If something goes wrong when running the wrapped action + * then log an error return an empty maybe + * + * @param action A callable encapsulating some action + * @return A maybe with the action's return value or empty + */ + public static Maybe safeMaybe(Callable action) { + Maybe result; + try { + T value = action.call(); + result = Maybe.just(value); + } catch (Exception e) { + logger.warn("An exception occurred; trying to continue", e); + result = Maybe.empty(); + } + return result; + } + /** * A 'poison' is an object that serves as an end marker on blocking queues */ public static final Object POISON = new Object(); + @SuppressWarnings("unchecked") + public static T poison() { + return (T)POISON; + } + public static Map nameMap = new ConcurrentHashMap<>(); public static FlowableTransformer counter(String name, long interval) { @@ -225,12 +255,12 @@ public static FlowableTransformer> queueProducer(int cap } - public static Flowable fromBlockingQueue(BlockingQueue queue) { + public static Flowable fromBlockingQueue(BlockingQueue queue, Predicate isPoison) { return Flowable.generate( () -> queue, (q, e) -> { T item = q.take(); - if(item == POISON) { + if (isPoison.test(item)) { e.onComplete(); } else { e.onNext(item); @@ -358,9 +388,8 @@ public void subscribe(FlowableEmitter emitter) throws Exception { */ public static void consume(Flowable flowable) { Flowable tmp = flowable - .flatMapMaybe(batch -> { - return Maybe.empty(); - }) + //.mapOptional(x -> Optional.empty()) + .concatMapMaybe(x -> Maybe.empty()) .onErrorReturn(t -> t); Throwable e = tmp.singleElement().blockingGet(); diff --git a/jena-sparql-api-rx/src/test/java/org/aksw/jena_sparql_api/rx/op/TestOperatorLocalOrder.java b/jena-sparql-api-rx/src/test/java/org/aksw/jena_sparql_api/rx/op/TestOperatorLocalOrder.java new file mode 100644 index 000000000..759178fdd --- /dev/null +++ b/jena-sparql-api-rx/src/test/java/org/aksw/jena_sparql_api/rx/op/TestOperatorLocalOrder.java @@ -0,0 +1,41 @@ +package org.aksw.jena_sparql_api.rx.op; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; + +import org.junit.Assert; +import org.junit.Test; + +import io.reactivex.rxjava3.core.Flowable; + +public class TestOperatorLocalOrder { + @Test + public void test() { + Map map = new LinkedHashMap<>(); + map.put(1l, "a"); + map.put(2l, "b"); + map.put(3l, "c"); + map.put(4l, "d"); + map.put(5l, "e"); + + List expected = new ArrayList<>(map.values()); + + Random rand = new Random(0); + List> list = new ArrayList<>(map.entrySet()); + + Collections.shuffle(list, rand); + + List actual = Flowable.fromIterable(list) + .lift(OperatorLocalOrder.create(1l, i -> i + 1, (a, b) -> a - b, Entry::getKey)) + .map(Entry::getValue) + .toList() + .blockingGet(); + + Assert.assertEquals(expected, actual); + } +} diff --git a/jena-sparql-api-server-standalone/pom.xml b/jena-sparql-api-server-standalone/pom.xml index 3bed47977..c27862454 100644 --- a/jena-sparql-api-server-standalone/pom.xml +++ b/jena-sparql-api-server-standalone/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-server-utils/pom.xml b/jena-sparql-api-server-utils/pom.xml index fe5016000..a6a703106 100644 --- a/jena-sparql-api-server-utils/pom.xml +++ b/jena-sparql-api-server-utils/pom.xml @@ -11,7 +11,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-service-description/pom.xml b/jena-sparql-api-service-description/pom.xml index a50f060ee..6163c8274 100644 --- a/jena-sparql-api-service-description/pom.xml +++ b/jena-sparql-api-service-description/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-sparql-ext/pom.xml b/jena-sparql-api-sparql-ext/pom.xml index dcb070bca..f9899e716 100644 --- a/jena-sparql-api-sparql-ext/pom.xml +++ b/jena-sparql-api-sparql-ext/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/OpExecutorServiceOrFile.java b/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/OpExecutorServiceOrFile.java index f695f7aee..b2bb9270a 100644 --- a/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/OpExecutorServiceOrFile.java +++ b/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/OpExecutorServiceOrFile.java @@ -8,12 +8,12 @@ public class OpExecutorServiceOrFile extends OpExecutor { - protected OpExecutorServiceOrFile(ExecutionContext execCxt) { + public OpExecutorServiceOrFile(ExecutionContext execCxt) { super(execCxt); } @Override - protected QueryIterator execute(OpService opService, QueryIterator input) { + public QueryIterator execute(OpService opService, QueryIterator input) { return new QueryIterServiceOrFile(input, opService, execCxt); } } diff --git a/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/QueryIterServiceOrFile.java b/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/QueryIterServiceOrFile.java index 942108e5c..de2b61c34 100644 --- a/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/QueryIterServiceOrFile.java +++ b/jena-sparql-api-sparql-ext/src/main/java/org/aksw/jena_sparql_api/sparql/ext/fs/QueryIterServiceOrFile.java @@ -13,6 +13,8 @@ import java.util.concurrent.TimeUnit; import org.aksw.jena_sparql_api.io.binseach.BinarySearchOnSortedFile; +import org.aksw.jena_sparql_api.io.binseach.BinarySearcher; +import org.aksw.jena_sparql_api.io.binseach.BlockSources; import org.aksw.jena_sparql_api.io.binseach.GraphFromPrefixMatcher; import org.aksw.jena_sparql_api.io.binseach.GraphFromSubjectCache; import org.aksw.jena_sparql_api.rx.GraphOpsRx; @@ -96,9 +98,10 @@ public static Entry> toPathSpec(Node node) { URI effectiveUri = new URI(uriStr.replaceAll("\\?.*", "")); path = Paths.get(effectiveUri); - boolean fileExists = Files.exists(path); +// boolean fileExists = Files.exists(path); - result = fileExists ? Maps.immutableEntry(path, params) : null; + // result = fileExists ? Maps.immutableEntry(path, params) : null; + return Maps.immutableEntry(path, params); } catch (URISyntaxException e) { //throw new RuntimeException(e); // Nothing todo; we simply return null if we fail @@ -141,13 +144,18 @@ protected QueryIterator nextStagePath(Binding outerBinding, Path path, Map bindingFlow = Flowable.generate(() -> { - Graph graph = new GraphFromPrefixMatcher(BinarySearchOnSortedFile.create(path)); + BinarySearcher binarySearcher = path.getFileName().toString().toLowerCase().endsWith(".bz2") + ? BlockSources.createBinarySearcherBz2(path) + : BlockSources.createBinarySearcherText(path); + + Graph graph = new GraphFromPrefixMatcher(binarySearcher); GraphFromSubjectCache subjectCacheGraph = new GraphFromSubjectCache(graph); Model model = ModelFactory.createModelForGraph(subjectCacheGraph); QueryExecution qe = QueryExecutionFactory.create(query, model); diff --git a/jena-sparql-api-stmt/pom.xml b/jena-sparql-api-stmt/pom.xml index bdcd0045d..2878f4f46 100644 --- a/jena-sparql-api-stmt/pom.xml +++ b/jena-sparql-api-stmt/pom.xml @@ -13,7 +13,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-udf/pom.xml b/jena-sparql-api-udf/pom.xml index 1a3209ccb..e44f4acb7 100644 --- a/jena-sparql-api-udf/pom.xml +++ b/jena-sparql-api-udf/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/UserDefinedFunctionResource.java b/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/UserDefinedFunctionResource.java index 1d392097a..5c378be37 100644 --- a/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/UserDefinedFunctionResource.java +++ b/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/UserDefinedFunctionResource.java @@ -8,21 +8,21 @@ import org.apache.jena.rdf.model.Resource; public interface UserDefinedFunctionResource - extends Resource + extends Resource { - /** - * Get a simple definition of the function in form of a list of strings. - * The first item is the SPARQL expression string whereas the remaining elements are the parameter - * variable names. - * - * @return - */ - @Iri("http://ns.aksw.org/jena/udf/simpleDefinition") - List getSimpleDefinition(); + /** + * Get a simple definition of the function in form of a list of strings. + * The first item is the SPARQL expression string whereas the remaining elements are the parameter + * variable names. + * + * @return + */ + @Iri("http://ns.aksw.org/jena/udf/simpleDefinition") + List getSimpleDefinition(); - @Iri("http://ns.aksw.org/jena/udf/definition") - Set getDefinitions(); + @Iri("http://ns.aksw.org/jena/udf/definition") + Set getDefinitions(); // public default UserDefinedFunctionDefinition toJena() { // UserDefinedFunctionDefinition result = toJena(this); diff --git a/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/plugin/JenaPluginUdf.java b/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/plugin/JenaPluginUdf.java index 5e97d404c..aec3d3bf0 100644 --- a/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/plugin/JenaPluginUdf.java +++ b/jena-sparql-api-udf/src/main/java/org/aksw/jena_sparql_api/user_defined_function/plugin/JenaPluginUdf.java @@ -2,26 +2,37 @@ import org.aksw.jena_sparql_api.mapper.proxy.JenaPluginUtils; +import org.aksw.jena_sparql_api.user_defined_function.InverseDefinition; +import org.aksw.jena_sparql_api.user_defined_function.PrefixDefinition; +import org.aksw.jena_sparql_api.user_defined_function.PrefixSet; +import org.aksw.jena_sparql_api.user_defined_function.UdfDefinition; +import org.aksw.jena_sparql_api.user_defined_function.UdpfDefinition; import org.aksw.jena_sparql_api.user_defined_function.UserDefinedFunctionResource; import org.apache.jena.enhanced.Personality; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.sys.JenaSubsystemLifecycle; public class JenaPluginUdf - implements JenaSubsystemLifecycle + implements JenaSubsystemLifecycle { - public void start() { - init(); - } + public void start() { + init(); + } - @Override - public void stop() { - } + @Override + public void stop() { + } - public static void init() { - JenaPluginUtils.scan(UserDefinedFunctionResource.class); - } - - public static void init(Personality p) { - } + public static void init() { + JenaPluginUtils.registerResourceClasses( + UserDefinedFunctionResource.class, + InverseDefinition.class, + PrefixDefinition.class, + PrefixSet.class, + UdfDefinition.class + ); + } + + public static void init(Personality p) { + } } diff --git a/jena-sparql-api-update/pom.xml b/jena-sparql-api-update/pom.xml index 13bb920d7..5929a4326 100644 --- a/jena-sparql-api-update/pom.xml +++ b/jena-sparql-api-update/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-utils/pom.xml b/jena-sparql-api-utils/pom.xml index d55cf04b2..bd2ce480d 100644 --- a/jena-sparql-api-utils/pom.xml +++ b/jena-sparql-api-utils/pom.xml @@ -12,7 +12,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/ExprTransformNodeElement.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/ExprTransformNodeElement.java index c1cb0dcbc..b784ecb79 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/ExprTransformNodeElement.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/ExprTransformNodeElement.java @@ -37,9 +37,10 @@ import org.apache.jena.sparql.syntax.syntaxtransform.ElementTransform; import org.apache.jena.sparql.syntax.syntaxtransform.ElementTransformer; -// Copied version from jena 3.11.0 and added missing transformation for aggregators +// Copied version from jena 3.11.0 and added missing transformation for aggregators // ~ Claus, 2019 Jun 4 - +// Added handler for case when node transform yields a variable to prevent wrapping as NodeValue +// ~ Claus, 2020 Dec 2 /** * Special version of ExprTransform for applying a node transform on syntax * (Elements) only @@ -53,8 +54,8 @@ public class ExprTransformNodeElement extends ExprTransformCopy { public ExprTransformNodeElement(NodeTransform nodeTransform, ElementTransform eltrans) { this(nodeTransform, eltrans, null, null) ; } - - public ExprTransformNodeElement(NodeTransform nodeTransform, ElementTransform eltrans, + + public ExprTransformNodeElement(NodeTransform nodeTransform, ElementTransform eltrans, ElementVisitor beforeVisitor, ElementVisitor afterVisitor) { this.nodeTransform = nodeTransform ; this.elementTransform = eltrans ; @@ -79,7 +80,8 @@ public Expr transform(NodeValue nv) { Node n = nodeTransform.apply(nv.asNode()) ; if ( n == nv.asNode() ) return nv ; - return NodeValue.makeNode(n) ; + + return n.isVariable() ? new ExprVar((Var)n) : NodeValue.makeNode(n) ; } @Override @@ -97,10 +99,10 @@ public Expr transform(ExprFunctionOp funcOp, ExprList args, Op opArg) { throw new InternalErrorException("Unknown ExprFunctionOp: " + funcOp.getFunctionSymbol()) ; } } - + @Override public Expr transform(ExprAggregator eAgg) { - Expr result = eAgg.applyNodeTransform(nodeTransform); - return result; + Expr result = eAgg.applyNodeTransform(nodeTransform); + return result; } } diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/QueryTransformOps.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/QueryTransformOps.java index bb25ffe8a..293a74092 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/QueryTransformOps.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/backports/syntaxtransform/QueryTransformOps.java @@ -42,7 +42,7 @@ import org.apache.jena.sparql.syntax.syntaxtransform.ElementTransform; import org.apache.jena.sparql.syntax.syntaxtransform.ElementTransformSubst; -/* MODIFIED VERSION WITH FIX FOR PROJECTIONS AND AGGREGATIONS (VarExprList) +/* MODIFIED VERSION WITH FIX FOR PROJECTIONS AND AGGREGATIONS (VarExprList) /** Support for transformation of query abstract syntax. */ @@ -72,14 +72,14 @@ public static Query transform(Query query, Map substitution // q2.setQueryPattern(el2) ; // return q2 ; // } - + /** Transform a query using {@link ElementTransform} and {@link ExprTransform}. * It is the responsibility of these transforms to transform to a legal SPARQL query. - */ + */ public static Query transform(Query query, ElementTransform transform, ExprTransform exprTransform) { Query q2 = QueryTransformOps.shallowCopy(query); - + // "Shallow copy with transform." transformVarExprList(q2.getProject(), exprTransform); transformVarExprList(q2.getGroupBy(), exprTransform); @@ -91,28 +91,28 @@ public static Query transform(Query query, ElementTransform transform, ExprTrans // if ( q2.hasHaving() ) {} // if ( q2.hasAggregators() ) {} if(q2.hasAggregators()) { - List eas = q2.getAggregators(); - for(int i = 0; i < eas.size(); ++i) { - ExprAggregator before = eas.get(i); - ExprAggregator after = (ExprAggregator)before.apply(exprTransform); - eas.set(i, after); - } + List eas = q2.getAggregators(); + for(int i = 0; i < eas.size(); ++i) { + ExprAggregator before = eas.get(i); + ExprAggregator after = (ExprAggregator)before.apply(exprTransform); + eas.set(i, after); + } } //transformExprAggregatorList(q2.getAggregators(), exprTransform); - - + + Element el = q2.getQueryPattern(); - + // Pattern can be null, such as for DESCRIBE queries if(el != null) { - Element el2 = ElementTransformer.transform(el, transform, exprTransform); - // Top level is always a group. - if (!(el2 instanceof ElementGroup)) { - ElementGroup eg = new ElementGroup(); - eg.addElement(el2); - el2 = eg; - } - q2.setQueryPattern(el2); + Element el2 = ElementTransformer.transform(el, transform, exprTransform); + // Top level is always a group. + if (!(el2 instanceof ElementGroup)) { + ElementGroup eg = new ElementGroup(); + eg.addElement(el2); + el2 = eg; + } + q2.setQueryPattern(el2); } return q2; } @@ -171,7 +171,7 @@ public void startVisit(Query query) { DatasetDescription desc = query.getDatasetDescription() ; for (String x : desc.getDefaultGraphURIs()) newQuery.addGraphURI(x) ; - for (String x : desc.getDefaultGraphURIs()) + for (String x : desc.getNamedGraphURIs()) newQuery.addNamedGraphURI(x) ; } @@ -279,11 +279,11 @@ public void visitValues(Query query) { @Override public void finishVisit(Query query) {} - @Override - public void visitJsonResultForm(Query arg0) { - // TODO Auto-generated method stub - - } + @Override + public void visitJsonResultForm(Query arg0) { + // TODO Auto-generated method stub + + } } public static Query shallowCopy(Query query) { diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/syntax/QueryGenerationUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/syntax/QueryGenerationUtils.java index 11c3b0ec7..76695b3ea 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/syntax/QueryGenerationUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/syntax/QueryGenerationUtils.java @@ -197,6 +197,9 @@ public static Entry createQueryCountPartition(Query query, Collectio Query clone = query.cloneQuery(); if(clone.isConstructType()) { + // Try to count the distinct number of template instantiations - i.e. + // the number of distinct bindings involving the variables + // mentioned in the template Template template = clone.getConstructTemplate(); Set vars = partitionVars == null ? QuadPatternUtils.getVarsMentioned(template.getQuads()) @@ -204,15 +207,18 @@ public static Entry createQueryCountPartition(Query query, Collectio clone.setQuerySelectType(); - // TODO Vars may be empty, in case we deal with a partitioned query +// System.out.println("Clone: " + clone); if(vars.isEmpty()) { //query.setQueryResultStar(true); + // TODO The distinct number of template instantiations if there is no var in a template is + // at most 1 throw new RuntimeException("Variables required for counting"); } else { clone.setQueryResultStar(false); clone.addProjectVars(vars); clone.setDistinct(true); } +// System.out.println("Clone2: " + clone); } else { // TODO We need to check whether the partition variables are mapped to expressions in the projection Set allowedVars = partitionVars == null @@ -803,17 +809,16 @@ public static Query createQueryCountCore(Var resultVar, Query rawQuery, Long ite // || query.isDistinct() || query.isReduced(); - if(itemLimit != null) { - long queryLimit = query.getLimit(); - long effectiveItemLimit = queryLimit == Query.NOLIMIT - ? itemLimit - : Math.min(queryLimit, itemLimit); + long tmpItemLimit = itemLimit == null ? Query.NOLIMIT : itemLimit; + long queryLimit = query.getLimit(); + long effectiveLimit = queryLimit == Query.NOLIMIT + ? tmpItemLimit + : tmpItemLimit == Query.NOLIMIT + ? queryLimit + : Math.min(queryLimit, itemLimit); -// query.setDistinct(false); - query.setLimit(effectiveItemLimit); - -// query = QueryGenerationUtils.wrapAsSubQuery(query); -// query.setDistinct(isDistinct); + if(effectiveLimit != Query.NOLIMIT) { + query.setLimit(effectiveLimit); needsWrapping = true; } diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/util/sparql/syntax/path/PathUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/util/sparql/syntax/path/PathUtils.java index e9d151eb8..f2a17f9fb 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/util/sparql/syntax/path/PathUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/util/sparql/syntax/path/PathUtils.java @@ -10,78 +10,85 @@ import org.apache.jena.sparql.path.P_ReverseLink; import org.apache.jena.sparql.path.P_Seq; import org.apache.jena.sparql.path.Path; +import org.apache.jena.sparql.path.PathFactory; import org.apache.jena.sparql.util.NodeUtils; import com.google.common.collect.ComparisonChain; import com.google.common.collect.Streams; public class PathUtils { - - public static final Path nullPath = new P_Link(org.aksw.jena_sparql_api.utils.NodeUtils.nullUriNode); - - public static boolean isNull(Path path) { - boolean result = nullPath.equals(path); - return result; - + + public static final Path nullPath = new P_Link(org.aksw.jena_sparql_api.utils.NodeUtils.nullUriNode); + + public static boolean isNull(Path path) { + boolean result = nullPath.equals(path); + return result; + // boolean result = ExprEvalValueSet.tryCastAs(P_Path0.class, path) // .filter(p -> NULL.equals(p)) // //.filter(p -> Node.NULL.equals(p.getNode())) // .isPresent(); // return result; - } - - public static Path foldNulls(Path path) { - Path result = PathTransformer.transform(path, new PathTransformerNullFold()); - return result; - } - - public static Path canonicalizeReverseLinks(Path path) { - Path result = PathTransformer.transform(path, new PathTransformCanonicalizeReverseLinks()); - return result; - } - - public static Path toSparqlPath(List steps) { - return ExprUtils.opifyBalanced(steps, (a, b) -> new P_Seq(a, b)); - //return ExprUtils.opifyBalanced(steps, P_Seq::new); - } - - public static P_Path0 createStep(String predicate, boolean isFwd) { - return createStep(NodeFactory.createURI(predicate), isFwd); - } - - public static P_Path0 createStep(Node predicate, boolean isFwd) { - P_Path0 result = isFwd ? new P_Link(predicate) : new P_ReverseLink(predicate); - return result; - } - - public static List toList(Path path) { - Path tmp = canonicalizeReverseLinks(path); - - PathVisitorToList visitor = new PathVisitorToList(); - tmp.visit(visitor); - List result = visitor.getResult(); - return result; - } - - public static int countForwardLinks(Iterable paths) { - int result = (int)Streams.stream(paths) - .filter(p -> p instanceof P_Path0 ? ((P_Path0)p).isForward() : false) - .count(); - return result; - } - - public static int countReverseLinks(Iterable paths) { - int result = (int)Streams.stream(paths) - .filter(p -> p instanceof P_Path0 ? !((P_Path0)p).isForward() : false) - .count(); - return result; - } - + } + + public static Path foldNulls(Path path) { + Path result = PathTransformer.transform(path, new PathTransformerNullFold()); + return result; + } + + public static Path canonicalizeReverseLinks(Path path) { + Path result = PathTransformer.transform(path, new PathTransformCanonicalizeReverseLinks()); + return result; + } + + public static Path toSparqlPath(List steps) { + return ExprUtils.opifyBalanced(steps, (a, b) -> new P_Seq(a, b)); + //return ExprUtils.opifyBalanced(steps, P_Seq::new); + } + + public static P_Path0 createStep(String predicate, boolean isFwd) { + return createStep(NodeFactory.createURI(predicate), isFwd); + } + + public static P_Path0 createStep(Node predicate, boolean isFwd) { + P_Path0 result = isFwd ? new P_Link(predicate) : new P_ReverseLink(predicate); + return result; + } + + + public static Path create(Path path, boolean isFwd) { + Path result = isFwd ? path : PathFactory.pathInverse(path); + return result; + } + + public static List toList(Path path) { + Path tmp = canonicalizeReverseLinks(path); + + PathVisitorToList visitor = new PathVisitorToList(); + tmp.visit(visitor); + List result = visitor.getResult(); + return result; + } + + public static int countForwardLinks(Iterable paths) { + int result = (int)Streams.stream(paths) + .filter(p -> p instanceof P_Path0 ? ((P_Path0)p).isForward() : false) + .count(); + return result; + } + + public static int countReverseLinks(Iterable paths) { + int result = (int)Streams.stream(paths) + .filter(p -> p instanceof P_Path0 ? !((P_Path0)p).isForward() : false) + .count(); + return result; + } + public static int compareStep(P_Path0 a, P_Path0 b) { - int result = ComparisonChain.start() - .compareTrueFirst(a.isForward(), b.isForward()) - .compare(a.getNode(), b.getNode(), NodeUtils::compareRDFTerms) - .result(); - return result; + int result = ComparisonChain.start() + .compareTrueFirst(a.isForward(), b.isForward()) + .compare(a.getNode(), b.getNode(), NodeUtils::compareRDFTerms) + .result(); + return result; } } diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ElementUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ElementUtils.java index e01b9c4da..eaa8e2c15 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ElementUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ElementUtils.java @@ -58,65 +58,65 @@ public class ElementUtils { // // return result; // } - - - public static Element applyOpTransform(Element elt, Function transform) { - Op beforeOp = Algebra.compile(elt); - Op afterOp = transform.apply(beforeOp); - Query query = OpAsQuery.asQuery(afterOp); - Element r = query.getQueryPattern(); - return r; - } - - // PatternVars only returns visible vars, this returns all mentioned vars - public static Set getVarsMentioned(Element e) { - NodeTransformCollectNodes tmp = new NodeTransformCollectNodes(); + + + public static Element applyOpTransform(Element elt, Function transform) { + Op beforeOp = Algebra.compile(elt); + Op afterOp = transform.apply(beforeOp); + Query query = OpAsQuery.asQuery(afterOp); + Element r = query.getQueryPattern(); + return r; + } + + // PatternVars only returns visible vars, this returns all mentioned vars + public static Set getVarsMentioned(Element e) { + NodeTransformCollectNodes tmp = new NodeTransformCollectNodes(); ElementUtils.applyNodeTransform(e, tmp); - Set nodes = tmp.getNodes(); - Set result = nodes.stream() - .filter(Node::isVariable) - .map(n -> (Var)n) - .collect(Collectors.toSet()); - - return result; - } - - - public static ElementTriplesBlock createElementTriple(Triple ... triples) { - return createElementTriple(Arrays.asList(triples)); - } - - public static ElementTriplesBlock createElementTriple(Iterable triples) { - BasicPattern bgp = new BasicPattern(); - triples.forEach(bgp::add); - ElementTriplesBlock result = new ElementTriplesBlock(bgp); - return result; - } - - public static ElementTriplesBlock createElementTriple(Node s, Node p, Node o) { - return createElement(new Triple(s, p, o)); - } - - - - public static ElementPathBlock createElementPath(Node s, Path p, Node o) { - ElementPathBlock result = createElementPath(new TriplePath(s, p, o)); - return result; - } - - public static ElementPathBlock createElementPath(TriplePath ... tps) { - ElementPathBlock result = createElementPath(Arrays.asList(tps)); - return result; - } - - public static ElementPathBlock createElementPath(Iterable it) { - ElementPathBlock result = new ElementPathBlock(); - for(TriplePath tp : it) { - result.addTriple(tp); - } - return result; - } + Set nodes = tmp.getNodes(); + Set result = nodes.stream() + .filter(Node::isVariable) + .map(n -> (Var)n) + .collect(Collectors.toSet()); + + return result; + } + + + public static ElementTriplesBlock createElementTriple(Triple ... triples) { + return createElementTriple(Arrays.asList(triples)); + } + + public static ElementTriplesBlock createElementTriple(Iterable triples) { + BasicPattern bgp = new BasicPattern(); + triples.forEach(bgp::add); + ElementTriplesBlock result = new ElementTriplesBlock(bgp); + return result; + } + + public static ElementTriplesBlock createElementTriple(Node s, Node p, Node o) { + return createElement(new Triple(s, p, o)); + } + + + + public static ElementPathBlock createElementPath(Node s, Path p, Node o) { + ElementPathBlock result = createElementPath(new TriplePath(s, p, o)); + return result; + } + + public static ElementPathBlock createElementPath(TriplePath ... tps) { + ElementPathBlock result = createElementPath(Arrays.asList(tps)); + return result; + } + + public static ElementPathBlock createElementPath(Iterable it) { + ElementPathBlock result = new ElementPathBlock(); + for(TriplePath tp : it) { + result.addTriple(tp); + } + return result; + } public static ElementTriplesBlock createElement(Triple triple) { BasicPattern bgp = new BasicPattern(); @@ -244,10 +244,10 @@ public static Element toElement(Collection elements) { } public static Element unionIfNeeded(Element ... elements) { - Element result = unionIfNeeded(Arrays.asList(elements)); - return result; + Element result = unionIfNeeded(Arrays.asList(elements)); + return result; } - + public static Element unionIfNeeded(Collection elements) { Element result; if(elements.size() == 1) { @@ -305,7 +305,7 @@ public static Element createRenamedElement(Element element, Map streamNodes(Graph graph) { - return stream(graph) - .flatMap(TripleUtils::streamNodes); - } - - public static Stream stream(Graph graph) { - ExtendedIterator it = graph.find(); - return Streams.stream(it).onClose(it::close); - } + + public static Stream streamNodes(Graph graph) { + return stream(graph) + .flatMap(TripleUtils::streamNodes); + } + + /** + * Remove all unused prefixes form the given graph's prefix mapping. + * Scans all triples in the graph. + * + * @param graph The graph whose prefix mapping to optimize + * @return The given graph + */ + public static Graph optimizePrefixes(Graph graph) { + PrefixMapping pm = graph.getPrefixMapping(); + PrefixMapping usedPrefixes = new PrefixMappingImpl(); + try(Stream nodeStream = streamNodes(graph)) { + PrefixUtils.usedPrefixes(pm, nodeStream, usedPrefixes); + } + pm.clearNsPrefixMap(); + pm.setNsPrefixes(usedPrefixes); + + return graph; + } + + public static Stream stream(Graph graph) { + ExtendedIterator it = graph.find(); + return Streams.stream(it).onClose(it::close); + } /** * Small convenience wrapper for default-loading of an RDF resource, @@ -89,73 +110,73 @@ public static Map indexBySubject(Iterator it) { return result; } - - public static boolean isValid(Triple t) { - boolean result; - try { - String str = NodeFmtLib.str(t) + " ."; - NTripleUtils.parseNTriplesString(str); - result = true; - } catch(Exception e) { - result = false; - } - return result; - } - - /** - * Fix for an issue we observed in some HDT files: - * This method fixes triples that have a graph component in the object position by discarding - * the graph from that object. - * - * The introduced overhead is a factor of a bit more than 3; - * Tested on iterating the 1 million triples of a corrupted HDT file: - * - * Plain HDT: 1777ms ~ 1.5sec + + public static boolean isValid(Triple t) { + boolean result; + try { + String str = NodeFmtLib.str(t) + " ."; + NTripleUtils.parseNTriplesString(str); + result = true; + } catch(Exception e) { + result = false; + } + return result; + } + + /** + * Fix for an issue we observed in some HDT files: + * This method fixes triples that have a graph component in the object position by discarding + * the graph from that object. + * + * The introduced overhead is a factor of a bit more than 3; + * Tested on iterating the 1 million triples of a corrupted HDT file: + * + * Plain HDT: 1777ms ~ 1.5sec * Fixed HDT: 4922ms ~ 5sec - * - * @param t - * @return - */ - public static Triple fixTripleWithGraphInObject(Triple t) { - - // Only fix the object, therefore use short strings in s and p position - // to speed up re-parsing - try { - StringBuilder sb = new StringBuilder(); - sb.append(" "); - sb.append(NodeFmtLib.str(t.getObject())); - sb.append(" ."); - String str = sb.toString(); - Quad q = NTripleUtils.parseNQuadsString(str); - Triple r = new Triple(t.getSubject(), t.getPredicate(), q.getObject()); - return r; - } catch(Exception e) { - return null; - } - } + * + * @param t + * @return + */ + public static Triple fixTripleWithGraphInObject(Triple t) { + + // Only fix the object, therefore use short strings in s and p position + // to speed up re-parsing + try { + StringBuilder sb = new StringBuilder(); + sb.append(" "); + sb.append(NodeFmtLib.str(t.getObject())); + sb.append(" ."); + String str = sb.toString(); + Quad q = NTripleUtils.parseNQuadsString(str); + Triple r = new Triple(t.getSubject(), t.getPredicate(), q.getObject()); + return r; + } catch(Exception e) { + return null; + } + } // public static Graph wrapWithNtripleParse(Graph base) { // return new GraphWrapperTransform(base, it -> it.filterKeep(GraphUtils::isValid)); // } - /** - * We encountered HDT files that contained quads although the Java API - * treated them as triples. - * This wrapper causes all triples to be serialized as quads and then subsequently re-parsed. - * Only the triple component is then returned. - * - * - * @param base - * @return - */ - public static Graph wrapGraphWithNQuadsFix(Graph base) { - return new GraphWrapperTransform(base, it -> it - .mapWith(GraphUtils::fixTripleWithGraphInObject) - .filterKeep(Objects::nonNull)); - } - - public static Graph wrapWithValidation(Graph base) { - return new GraphWrapperTransform(base, it -> it - .filterKeep(GraphUtils::isValid)); - } + /** + * We encountered HDT files that contained quads although the Java API + * treated them as triples. + * This wrapper causes all triples to be serialized as quads and then subsequently re-parsed. + * Only the triple component is then returned. + * + * + * @param base + * @return + */ + public static Graph wrapGraphWithNQuadsFix(Graph base) { + return new GraphWrapperTransform(base, it -> it + .mapWith(GraphUtils::fixTripleWithGraphInObject) + .filterKeep(Objects::nonNull)); + } + + public static Graph wrapWithValidation(Graph base) { + return new GraphWrapperTransform(base, it -> it + .filterKeep(GraphUtils::isValid)); + } } diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ModelUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ModelUtils.java index 8bebd7f7c..a13db7939 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ModelUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ModelUtils.java @@ -18,8 +18,6 @@ import org.aksw.commons.collections.MultiMaps; import org.aksw.commons.util.strings.StringUtils; import org.apache.jena.graph.Graph; -import org.apache.jena.graph.Node; -import org.apache.jena.graph.Triple; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Property; @@ -28,7 +26,6 @@ import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Statement; import org.apache.jena.rdf.model.StmtIterator; -import org.apache.jena.shared.PrefixMapping; import org.apache.jena.vocabulary.RDFS; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,7 +35,18 @@ */ public class ModelUtils { - + /** + * Remove all unused prefixes form the given model's prefix mapping. + * Assumes that the model delegates to the prefix mapping of an underlying {@link Graph}. + * + * @param graph The model whose prefix mapping to optimize + * @return The given model + */ + public static Model optimizePrefixes(Model model) { + GraphUtils.optimizePrefixes(model.getGraph()); + return model; + } + /** * Extracts a mapping childClass -> parentClass from a given Model. * You can use TransitiveClosure.transitiveClosure for "inferring" the whole hierarchy. @@ -289,18 +297,18 @@ public static String[] decompose(String uri, NavigableMap prefix prefix = candidatePrefix; name = uri.substring(splitIdx); } - + return new String[]{prefix, name}; } - public static String prettyUri(String uri, NavigableMap prefixMap) - { - String[] tmp = decompose(uri, prefixMap); + public static String prettyUri(String uri, NavigableMap prefixMap) + { + String[] tmp = decompose(uri, prefixMap); - String result = (tmp[0].isEmpty()) - ? StringUtils.urlDecode(tmp[1]) - : tmp[0] + ":" + StringUtils.urlDecode(tmp[1]); + String result = (tmp[0].isEmpty()) + ? StringUtils.urlDecode(tmp[1]) + : tmp[0] + ":" + StringUtils.urlDecode(tmp[1]); - return result; - } + return result; + } } diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/NodeUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/NodeUtils.java index fa7cfc875..83a0bdd20 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/NodeUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/NodeUtils.java @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Map.Entry; import java.util.Set; import java.util.function.Function; @@ -14,88 +15,123 @@ import org.apache.jena.ext.com.google.common.collect.Maps; import org.apache.jena.graph.Node; import org.apache.jena.graph.NodeFactory; +import org.apache.jena.graph.Triple; import org.apache.jena.riot.out.NodeFormatterNT; import org.apache.jena.sparql.core.Var; import com.google.common.collect.Iterables; public class NodeUtils { - - public static final String nullUri = "http://null.null/null"; - public static final Node nullUriNode = NodeFactory.createURI(nullUri); - // Prefix for URIs referring to environment variables - public static final String ENV_PREFIX = "env:"; + public static final String nullUri = "http://null.null/null"; + public static final Node nullUriNode = NodeFactory.createURI(nullUri); + + // Prefix for URIs referring to environment variables + public static final String ENV_PREFIX = "env:"; // public static final Node N_ABSENT = NodeFactory.createURI("http://special.absent/none"); - // Note to myself because I repeatedly added node/prefix utils here: - // Prefix / PrefixMapping related utils are in PrefixUtils ~ Claus - - public static boolean isEnvKey(Node node) { - boolean result = getEnvKey(node) != null; - return result; - } - - // Return key + flag for string/iri - public static Entry getEnvKey(Node node) { - Entry result = null; - if(node.isURI()) { - String str = node.getURI(); - if(str.startsWith(ENV_PREFIX)) { - String key = str.substring(ENV_PREFIX.length()); - - boolean isIri = false; - if(key.startsWith("//")) { - key = key.substring(2); - isIri = true; - } - - result = Maps.immutableEntry(key, isIri); - } - } - - return result; - } - - - public static Node substWithLookup2(Node node, Function lookup) { - Entry e = getEnvKey(node); - - Node result = node; - if(e != null) { - String key = e.getKey(); - boolean isUri = e.getValue(); - Node value = lookup.apply(key); - if(value != null) { - result = isUri - ? NodeFactory.createURI(value.toString()) - : value; // NodeFactory.createLiteral(value); - } - } - - return result; - } - - public static Node substWithLookup(Node node, Function lookup) { - - Entry e = getEnvKey(node); - - Node result = node; - if(e != null) { - String key = e.getKey(); - boolean isUri = e.getValue(); - String value = lookup.apply(key); - if(value != null) { - result = isUri - ? NodeFactory.createURI(value) - : NodeFactory.createLiteral(value); - } - - } - - return result; - + // Note to myself because I repeatedly added node/prefix utils here: + // Prefix / PrefixMapping related utils are in PrefixUtils ~ Claus + + public static boolean isEnvKey(Node node) { + boolean result = getEnvKey(node) != null; + return result; + } + + public static boolean isNullOrAny(Node node) { + return node == null || Node.ANY.equals(node); + } + + + /** This method is unfortunately private in {@link Triple} at least in jena 3.16 */ + public static Node nullToAny(Node n) { + return n == null ? Node.ANY : n; + } + + /** + * Create a logical conjunction of two nodes: + * - Node.ANY or null matches everything + * - If any argument matches everything return the other argument (convert null to ANY) + * - if both arguments are concrete nodes then return one if them if they are equal + * - otherwise return null + * + */ + public static Node logicalAnd(Node a, Node b) { + Node result = NodeUtils.isNullOrAny(a) + ? nullToAny(b) + : NodeUtils.isNullOrAny(b) || Objects.equals(a, b) + ? nullToAny(a) + : null; + + return result; + } + + /** + * Return a pair (key, flag for string (false)/iri(true)) for nodes that reference + * environment variables - null otherwise. + * + * @param node + * @return + */ + public static Entry getEnvKey(Node node) { + Entry result = null; + if(node.isURI()) { + String str = node.getURI(); + if(str.startsWith(ENV_PREFIX)) { + String key = str.substring(ENV_PREFIX.length()); + + boolean isIri = false; + if(key.startsWith("//")) { + key = key.substring(2); + isIri = true; + } + + result = Maps.immutableEntry(key, isIri); + } + } + + return result; + } + + + public static Node substWithLookup2(Node node, Function lookup) { + Entry e = getEnvKey(node); + + Node result = node; + if(e != null) { + String key = e.getKey(); + boolean isUri = e.getValue(); + Node value = lookup.apply(key); + if(value != null) { + result = isUri + ? NodeFactory.createURI(value.toString()) + : value; // NodeFactory.createLiteral(value); + } + } + + return result; + } + + public static Node substWithLookup(Node node, Function lookup) { + + Entry e = getEnvKey(node); + + Node result = node; + if(e != null) { + String key = e.getKey(); + boolean isUri = e.getValue(); + String value = lookup.apply(key); + if(value != null) { + result = isUri + ? NodeFactory.createURI(value) + : NodeFactory.createLiteral(value); + } + + } + + return result; + // Node result = node; // if(node.isURI()) { // String str = node.getURI(); @@ -108,7 +144,7 @@ public static Node substWithLookup(Node node, Function lookup) { // isUri = true; // } // -// +// // String value = lookup.apply(key); // if(!Strings.isNullOrEmpty(value)) { // result = isUri @@ -117,10 +153,22 @@ public static Node substWithLookup(Node node, Function lookup) { // } // } // } -// +// // return result; - } - + } + + /** + * Return the language of a node or null if the argument is not applicable + * + * @param node + * @return + */ + public static String getLang(Node node) { + String result = node != null && node.isLiteral() ? node.getLiteralLanguage() : null; + return result; + } + + public static Node asNullableNode(String uri) { Node result = uri == null ? null : NodeFactory.createURI(uri); return result; @@ -175,11 +223,11 @@ public static String toNTriplesString(Node node) { String result = writer.toString(); return result; } - - + + @Deprecated public static String toNTriplesStringOld(Node node) { - + String result; if(node.isURI()) { result = "<" + node.getURI() + ">"; diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/QueryUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/QueryUtils.java index 044b51411..e2a8bebc6 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/QueryUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/QueryUtils.java @@ -1,5 +1,7 @@ package org.aksw.jena_sparql_api.utils; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -38,6 +40,7 @@ import org.apache.jena.sparql.modify.request.QuadAcc; import org.apache.jena.sparql.syntax.Element; import org.apache.jena.sparql.syntax.ElementFilter; +import org.apache.jena.sparql.syntax.ElementGroup; import org.apache.jena.sparql.syntax.ElementNamedGraph; import org.apache.jena.sparql.syntax.ElementSubQuery; import org.apache.jena.sparql.syntax.ElementVisitorBase; @@ -49,6 +52,7 @@ import org.apache.jena.sparql.util.ExprUtils; import org.apache.jena.sparql.util.PrefixMapping2; +import com.google.common.collect.BoundType; import com.google.common.collect.DiscreteDomain; import com.google.common.collect.Range; @@ -190,6 +194,52 @@ public static Query restoreQueryForm(Query query, Query proto) { return result; } + + /** + * Combine multiple construct queries into a single query whose + * template and query pattern is the union of those of the provided queries + * This method does NOT perform any renaming of variables. + * + * + * @param queries + * @return + */ + public static Query unionConstruct(Query ... queries) { + return unionConstruct(Arrays.asList(queries)); + } + + /** + * Combine multiple construct queries into a single query whose + * template and query pattern is the union of those of the provided queries + * This method does NOT perform any renaming of variables. + * + * + * @param queries + * @return + */ + public static Query unionConstruct(Iterable queries) { + Query result = new Query(); + + // BasicPatten bgp = new BasicPattern(); + Set quadPatterns = new LinkedHashSet<>(); + Set elements = new LinkedHashSet<>(); + + for (Query query : queries) { + result.getPrefixMapping().setNsPrefixes(query.getPrefixMapping()); + + Template tmp = query.getConstructTemplate(); + + quadPatterns.addAll(tmp.getQuads()); + elements.add(query.getQueryPattern()); + } + + result.setQueryConstructType(); + result.setConstructTemplate(new Template(new QuadAcc(new ArrayList<>(quadPatterns)))); + result.setQueryPattern(ElementUtils.unionIfNeeded(elements)); + + return result; + } + // Create a construct query from a select query and a template public static Query selectToConstruct(Query query, Template template) { Query result = new Query(); @@ -533,6 +583,31 @@ public static Op applyRange(Op op, Range range) { return result; } + + /** + * Transform a range w.r.t. a discrete domain such that any lower bound is closed and the upper bound + * is open. As a result, a zero-length range is represented by [x..x) + * + * @param + * @param range + * @param domain + * @return + */ + public static > Range makeClosedOpen(Range range, DiscreteDomain domain) { + T lower = closedLowerEndpointOrNull(range, domain); + T upper = openUpperEndpointOrNull(range, domain); + + Range result = lower == null + ? upper == null + ? Range.all() + : Range.upTo(upper, BoundType.OPEN) + : upper == null + ? Range.atLeast(lower) + : Range.closedOpen(lower, upper); + + return result; + } + /** * Limit the query to the given range, relative to its own given range * @@ -586,10 +661,33 @@ public static Range createRange(Long limit, Long offset) { //public static LimitAndOffset rangeToLimitAndOffset(Range range) + public static > T closedLowerEndpointOrNull(Range range, DiscreteDomain domain) { + T result = !range.hasLowerBound() + ? null + : range.lowerBoundType().equals(BoundType.CLOSED) + ? range.lowerEndpoint() + : domain.next(range.lowerEndpoint()); + + return result; + } + + public static > T openUpperEndpointOrNull(Range range, DiscreteDomain domain) { + T result = !range.hasUpperBound() + ? null + : range.upperBoundType().equals(BoundType.CLOSED) + ? domain.next(range.upperEndpoint()) + : range.upperEndpoint(); + + return result; + } + + public static long rangeToOffset(Range range) { - long result = range == null || !range.hasLowerBound() ? 0 : range.lowerEndpoint(); + Long tmp = range == null + ? null + : closedLowerEndpointOrNull(range, DiscreteDomain.longs()); - result = result == 0 ? Query.NOLIMIT : result; + long result = tmp == null || tmp == 0 ? Query.NOLIMIT : tmp; return result; } @@ -599,11 +697,13 @@ public static long rangeToOffset(Range range) { * @return */ public static long rangeToLimit(Range range) { - range = range == null ? null : range.canonical(DiscreteDomain.longs()); + range = range == null ? null : makeClosedOpen(range, DiscreteDomain.longs()); long result = range == null || !range.hasUpperBound() ? Query.NOLIMIT - : DiscreteDomain.longs().distance(range.lowerEndpoint(), range.upperEndpoint()); + : DiscreteDomain.longs().distance(range.lowerEndpoint(), range.upperEndpoint()) + // If the upper bound is closed such as [x, x] then the result is the distance plus 1 + + (range.upperBoundType().equals(BoundType.CLOSED) ? 1 : 0); return result; } @@ -625,20 +725,31 @@ public static Range toRange(Long offset, Long limit) { return result; } - public static Range subRange(Range parent, Range child) { + /** + * Returns the absolute range for a child range relative to a parent range + * Assumes that both ranges have a lower endpoint + * + * @param _parent + * @param _child + * @return + */ + public static Range subRange(Range _parent, Range _child) { + Range parent = makeClosedOpen(_parent, DiscreteDomain.longs()); + Range child = makeClosedOpen(_child, DiscreteDomain.longs()); + long newMin = parent.lowerEndpoint() + child.lowerEndpoint(); Long newMax = (parent.hasUpperBound() ? child.hasUpperBound() - ? (Long)Math.min(parent.upperEndpoint(), child.upperEndpoint()) + ? (Long)Math.min(parent.upperEndpoint(), newMin + child.upperEndpoint()) : parent.upperEndpoint() : child.hasUpperBound() - ? (Long)child.upperEndpoint() + ? newMin + (Long)child.upperEndpoint() : null); Range result = newMax == null ? Range.atLeast(newMin) - : Range.closed(newMin, newMax); + : Range.closedOpen(newMin, newMax); return result; } @@ -691,7 +802,12 @@ public static Query elementToQuery(Element pattern, String resultVar) { if (pattern == null) return null; Query query = new Query(); - query.setQueryPattern(pattern); + + Element cleanElement = pattern instanceof ElementGroup || pattern instanceof ElementSubQuery + ? pattern + : ElementUtils.createElementGroup(pattern); + + query.setQueryPattern(cleanElement); query.setQuerySelectType(); if (resultVar == null) { diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ResultSetUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ResultSetUtils.java index e695c693a..3f5779e0c 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ResultSetUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/ResultSetUtils.java @@ -6,6 +6,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -19,9 +20,12 @@ import org.apache.jena.query.ResultSet; import org.apache.jena.query.ResultSetFactory; import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.sparql.algebra.Table; +import org.apache.jena.sparql.algebra.TableFactory; import org.apache.jena.sparql.core.Var; import org.apache.jena.sparql.engine.QueryIterator; import org.apache.jena.sparql.engine.binding.Binding; +import org.apache.jena.sparql.engine.binding.BindingFactory; import org.apache.jena.sparql.engine.iterator.QueryIterPlainWrapper; import org.apache.jena.sparql.expr.NodeValue; import org.apache.jena.util.iterator.ExtendedIterator; @@ -36,6 +40,18 @@ public class ResultSetUtils { + /** Materialize a {@link ResultSet} into a {@link Table} */ + public static Table resultSetToTable(ResultSet rs) { + List vars = Var.varList(rs.getResultVars()); + Table result = TableFactory.create(vars); + while (rs.hasNext()) { + Binding b = BindingFactory.copy(rs.nextBinding()); + result.addBinding(b); + } + + return result; + } + public static Multiset toMultisetQs(ResultSet rs) { Multiset result = HashMultiset.create(); while(rs.hasNext()) { diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/SetFromGraph.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/SetFromGraph.java index 11c1ae948..e7679b5c7 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/SetFromGraph.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/SetFromGraph.java @@ -1,24 +1,39 @@ package org.aksw.jena_sparql_api.utils; import java.util.AbstractSet; +import java.util.Objects; import org.apache.jena.graph.Graph; -import org.apache.jena.graph.Node; import org.apache.jena.graph.Triple; import org.apache.jena.util.iterator.ExtendedIterator; public class SetFromGraph extends AbstractSet { - private Graph graph; + protected Graph graph; public SetFromGraph(Graph graph) { - this.graph = graph; + this.graph = Objects.requireNonNull(graph); + } + + public Graph getGraph() { + return graph; } @Override public ExtendedIterator iterator() { - ExtendedIterator result = graph.find(Node.ANY, Node.ANY, Node.ANY); + ExtendedIterator result = graph.find(); + return result; + } + + @Override + public boolean contains(Object o) { + boolean result = false; + if(o instanceof Triple) { + Triple t = (Triple)o; + result = graph.contains(t); + } + return result; } diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/TripleUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/TripleUtils.java index eea36ce0d..77fee2df2 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/TripleUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/TripleUtils.java @@ -14,12 +14,33 @@ import org.apache.jena.riot.writer.NTriplesWriter; import org.apache.jena.sparql.engine.binding.Binding; import org.apache.jena.sparql.engine.binding.BindingHashMap; +import org.apache.jena.sparql.path.P_Path0; public class TripleUtils { - public static Stream streamNodes(Triple t) { - return Stream.of(t.getSubject(), t.getPredicate(), t.getObject()); - } + public static Stream streamNodes(Triple t) { + return Stream.of(t.getSubject(), t.getPredicate(), t.getObject()); + } + + + /** + * Create a logical conjunction from two triple pattern. + * + * @param a + * @param b + * @return + */ + public static Triple logicalAnd(Triple a, Triple b) { + Node s = NodeUtils.logicalAnd(a.getMatchSubject(), b.getMatchSubject()); + Node p = NodeUtils.logicalAnd(a.getMatchPredicate(), b.getMatchPredicate()); + Node o = NodeUtils.logicalAnd(a.getMatchObject(), b.getMatchObject()); + + Triple result = s == null || p == null || o == null + ? null + : Triple.createMatch(s, p, o); + + return result; + } // public static Multimap indexBySubject(Iterable triples) { // Multimap result = indexBySubject(triples.iterator()); @@ -38,13 +59,52 @@ public static Stream streamNodes(Triple t) { // return result; // } - public static Triple create(Node s, Node p, Node o, boolean swapSO) { - Triple result = swapSO - ? new Triple(o, p, s) - : new Triple(s, p, o); - - return result; - } + /** + * If isForward is true then return the triple's subject otherwise its object. + */ + public static Node getSource(Triple triple, boolean isForward) { + return isForward ? triple.getSubject() : triple.getObject(); + } + + /** + * If isForward is true then return the triple's object otherwise its subject. + */ + public static Node getTarget(Triple triple, boolean isForward) { + return isForward ? triple.getObject() : triple.getSubject(); + } + + /** + * Create a matcher for triples having a certain predicate and a source node. + * If 'isForward' is true then the subject acts as the source otherwise its the object. + */ + public static Triple createMatch(Node source, P_Path0 predicate) { + return createMatch(source, predicate.getNode(), predicate.isForward()); + } + + /** + * Create a matcher for triples having a certain predicate and a source node. + * If 'isForward' is true then the subject acts as the source otherwise its the object. + */ + public static Triple createMatch(Node source, Node predicate, boolean isForward) { + Triple result = isForward + ? Triple.createMatch(source, predicate, Node.ANY) + : Triple.createMatch(Node.ANY, predicate, source); + + return result; + } + + public static Triple create(Node s, P_Path0 p, Node o) { + Triple result = create(s, p.getNode(), o, p.isForward()); + return result; + } + + public static Triple create(Node s, Node p, Node o, boolean isForward) { + Triple result = isForward + ? new Triple(s, p, o) + : new Triple(o, p, s); + + return result; + } public static Node[] toArray(Triple t) { Node[] result = new Node[] { t.getSubject(), t.getPredicate(), t.getObject() }; @@ -110,17 +170,17 @@ public static String md5sum(Triple triple) { return result; } - public static Triple listToTriple(List nodes) { - return new Triple(nodes.get(0), nodes.get(1), nodes.get(2)); - } + public static Triple listToTriple(List nodes) { + return new Triple(nodes.get(0), nodes.get(1), nodes.get(2)); + } + + public static List tripleToList(Triple triple) + { + List result = new ArrayList(); + result.add(triple.getSubject()); + result.add(triple.getPredicate()); + result.add(triple.getObject()); - public static List tripleToList(Triple triple) - { - List result = new ArrayList(); - result.add(triple.getSubject()); - result.add(triple.getPredicate()); - result.add(triple.getObject()); - - return result; - } + return result; + } } \ No newline at end of file diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/UriUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/UriUtils.java index dda915e89..b601e9a85 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/UriUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/UriUtils.java @@ -9,7 +9,6 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; import org.apache.http.NameValuePair; import org.apache.http.client.utils.URLEncodedUtils; @@ -29,22 +28,24 @@ */ public class UriUtils { + /** + * See {@link #replaceNamespace} + */ public static final Pattern replaceNamespacePattern = Pattern.compile("(?<=/)[^/]+(?=/[^/]+/*$)"); - + /** * Only retains first value * @return */ public static Map createMapFromUriQueryString(URI uri) { - List nm = URLEncodedUtils.parse(uri, StandardCharsets.UTF_8); - Map result = nm.stream() - .collect(Collectors.toMap( - NameValuePair::getName, - NameValuePair::getValue, - (u, v) -> u, LinkedHashMap::new)); - - return result; + List pairs = URLEncodedUtils.parse(uri, StandardCharsets.UTF_8); + Map result = new LinkedHashMap<>(); + for (NameValuePair pair : pairs) { + result.putIfAbsent(pair.getName(), pair.getValue()); + } + + return result; } /** diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarExprListUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarExprListUtils.java index 1b52766e4..fd357db63 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarExprListUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarExprListUtils.java @@ -26,6 +26,24 @@ public class VarExprListUtils { + /** + * Add variables to an {@link VarExprList} if they are not already present in it + * + * @param varExprList + * @param vars + * @return + */ + public static VarExprList addAbsentVars(VarExprList varExprList , Collection vars) { + for (Var var : vars) { + if (!varExprList.contains(var)) { + varExprList.add(var); + } + } + + return varExprList; + } + + /** * Invert the mapping of a var expr list and return it as a jena-shaded guava Multimap. * ExprVars are generated for each variable that is otherwise mapped to null. diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarUtils.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarUtils.java index 08e56bca0..298529715 100644 --- a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarUtils.java +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/VarUtils.java @@ -15,13 +15,13 @@ import org.apache.jena.sparql.graph.NodeTransform; public class VarUtils { - public static final Pattern VARNAME = Pattern.compile("(\\?|\\$)?(?\\S*)"); + public static final Pattern VARNAME = Pattern.compile("(\\?|\\$)?(?\\S*)"); - /** - * Parse the patterns (?|$)\\S* as variables - * - * @return - */ + /** + * Parse the patterns (?|$)\\S* as variables + * + * @return + */ // public static Var parseVar(String str) { // Matcher m = VARNAME.matcher(str); // String s = m.find() @@ -33,33 +33,33 @@ public class VarUtils { // } - /** - * This method parses the string generated by Map.toString() back into the Java object. - * Variable names must not contain symbols '=' and ',' - * - * @param str - * @return - */ - public static Map parseVarMap(String str) { - Map result = new HashMap<>(); + /** + * This method parses the string generated by Map.toString() back into the Java object. + * Variable names must not contain symbols '=' and ',' + * + * @param str + * @return + */ + public static Map parseVarMap(String str) { + Map result = new HashMap<>(); - int l = str.length(); + int l = str.length(); - int start = str.startsWith("{") ? 1 : 0; - int end = str.endsWith("}") ? l - 1 : l; + int start = str.startsWith("{") ? 1 : 0; + int end = str.endsWith("}") ? l - 1 : l; - String sub = str.substring(start, end); - String[] entries = sub.split(","); + String sub = str.substring(start, end); + String[] entries = sub.split(","); - for(String entry : entries) { - String[] kv = entry.split("=", 2); - Var k = parseVar(kv[0]); - Var v = parseVar(kv[1]); - result.put(k, v); - } + for(String entry : entries) { + String[] kv = entry.split("=", 2); + Var k = parseVar(kv[0]); + Var v = parseVar(kv[1]); + result.put(k, v); + } - return result; - } + return result; + } public static Var parseVar(String str) { // Remove leading ? of the varName @@ -164,15 +164,12 @@ public static Var applyNodeTransform(Var var, NodeTransform nodeTransform, Var d * @param excludeSymmetry if true, exclude mappings from a var in vbs to itself. */ public static Map createDistinctVarMap(Collection vas, Collection vbs, boolean excludeSymmetry, Generator generator) { - //var vans = vas.map(VarUtils.getVarName); - - if (generator == null) { - Set forbidden = new HashSet<>(); - forbidden.addAll(vas); - forbidden.addAll(vbs); - generator = VarGeneratorBlacklist.create(forbidden); //vas); - //new VarGeneratorBlacklist(new VarGeneratorImpl(Gensym.create("v")), vas); - } + + // Ensure that the generator does not yield a forbidden variable + Set forbidden = new HashSet<>(); + forbidden.addAll(vas); + forbidden.addAll(vbs); + generator = VarGeneratorBlacklist.create(generator, forbidden); //vas); // Rename all variables that are in common Map result = new HashMap(); diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/DatasetGraphDiff.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/DatasetGraphDiff.java new file mode 100644 index 000000000..8a8c69f53 --- /dev/null +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/DatasetGraphDiff.java @@ -0,0 +1,321 @@ +package org.aksw.jena_sparql_api.utils.model; + +import static org.apache.jena.query.ReadWrite.WRITE; +import static org.apache.jena.system.Txn.executeWrite; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; + +import org.aksw.commons.collections.sets.SetIterator; +import org.apache.jena.ext.com.google.common.collect.Iterators; +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.graph.compose.Delta; +import org.apache.jena.graph.compose.Difference; +import org.apache.jena.query.ReadWrite; +import org.apache.jena.query.TxnType; +import org.apache.jena.sparql.JenaTransactionException; +import org.apache.jena.sparql.core.DatasetGraph; +import org.apache.jena.sparql.core.DatasetGraphBase; +import org.apache.jena.sparql.core.DatasetGraphFactory; +import org.apache.jena.sparql.core.GraphView; +import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.core.Transactional; + + +/** + * A DatasetGraph that tracks insertions / deletions to a base graph in separate + * DatasetGraphs. + * + * Similar to {@link Delta} which however is for Graphs. + * + * Does not support transactions. + * + * + * @author raven + * + */ +public class DatasetGraphDiff + extends DatasetGraphBase +{ + protected DatasetGraph base; + + protected DatasetGraph added; + protected DatasetGraph removed; + +// protected boolean allowEmptyGraphs; + + protected TransactionalSet removedGraphs; + protected TransactionalSet addedGraphs; + + protected GraphView defaultGraphViewCache = GraphView.createDefaultGraph(this); + protected Map namedGraphViewCache = Collections.synchronizedMap(new HashMap<>()); + + public DatasetGraphDiff() { + this(DatasetGraphFactory.createTxnMem()); + } + + public DatasetGraphDiff(DatasetGraph base) { + super(); + this.base = base; + this.added = DatasetGraphFactory.createTxnMem(); + this.removed = DatasetGraphFactory.createTxnMem(); + +// this.allowEmptyGraphs = allowEmptyGraphs; + + this.removedGraphs = new TransactionalSetImpl<>(); + this.addedGraphs = new TransactionalSetImpl<>(); + } + + + public DatasetGraph getBase() { + return base; + } + + public DatasetGraph getAdded() { + return added; + } + + public DatasetGraph getRemoved() { + return removed; + } + + public Set getRemovedGraphs() { + return removedGraphs; + } + + public Set getAddedGraphs() { + return addedGraphs; + } + + @Override + public Iterator find(Node g, Node s, Node p, Node o) { + Iterator itAdded = added.find(g, s, p, o); + + Iterator result = base.find(g, s, p, o); + + result = Iterators.filter(result, quad -> !removed.contains(quad)); + result = Iterators.concat(result, itAdded); + + return result; + } + + @Override + public Iterator findNG(Node g, Node s, Node p, Node o) { + + Iterator itAdded = added.findNG(g, s, p, o); + + Iterator result = base.findNG(g, s, p, o); + result = Iterators.filter(result, quad -> !removed.contains(quad)); + result = Iterators.concat(result, itAdded); + + return result; + } + + public void add(Quad quad) { + removed.delete(quad); + added.add(quad); + } + + @Override + public void delete(Quad quad) { + added.delete(quad); + removed.add(quad); + } + + @Override + public GraphView getDefaultGraph() { + return defaultGraphViewCache; + } + + @Override + public GraphView getGraph(Node graphNode) { + GraphView result = namedGraphViewCache.computeIfAbsent(graphNode, + n -> GraphView.createNamedGraph(this, n)); + + return result; + } + + + @Override + public Iterator listGraphNodes() { + Iterator result = base.listGraphNodes(); + // TODO Add flag to treat empty graphs as effectively removed + result = Iterators.filter(result, node -> !removedGraphs.contains(node)); + + Set effectiveAddedGraphs = new LinkedHashSet(addedGraphs); + added.listGraphNodes().forEachRemaining(effectiveAddedGraphs::add); + + result = Iterators.concat(result, effectiveAddedGraphs.iterator()); + + + return result; + } + + @Override + public void addGraph(Node graphNode, Graph graph) { + removedGraphs.remove(graphNode); + + if (base.containsGraph(graphNode)) { + Graph existing = base.getGraph(graphNode); + + added.addGraph(graphNode, new Difference(graph, existing)); + removed.addGraph(graphNode, new Difference(existing, graph)); + + } else { + addedGraphs.add(graphNode); + + removed.removeGraph(graphNode); + added.addGraph(graphNode, graph); + } + } + + @Override + public void removeGraph(Node graphNode) { + addedGraphs.remove(graphNode); + + if (base.containsGraph(graphNode)) { + removedGraphs.add(graphNode); + + added.removeGraph(graphNode); + + } else { + added.removeGraph(graphNode); + + // Sanity check: If the graph did not exist is base then + // the set of removals must be empty + // if (!core.containsGraph(graphNode)) { assert removed.isEmpty(); } + // removed.removeGraph(graphNode); + } + + added.removeGraph(graphNode); + removed.removeGraph(graphNode); + } + + public void materialize() { + mutate(this, DatasetGraphDiff::_materialize, this); + } + + protected void _materialize() { + removed.find().forEachRemaining(base::delete); + added.find().forEachRemaining(base::add); + + added.clear(); + addedGraphs.clear(); + removed.clear(); + removedGraphs.clear(); + } + + @Override + public boolean supportsTransactions() { + return true; + } + + @Override + public boolean supportsTransactionAbort() { + return true; + } + + @Override + public void abort() { + addedGraphs.abort(); + removedGraphs.abort(); + base.abort(); + added.abort(); + removed.abort(); + } + + @Override + public void begin(ReadWrite mode) { + base.begin(mode); + added.begin(mode); + removed.begin(mode); + addedGraphs.begin(mode); + removedGraphs.begin(mode); + } + + @Override + public void commit() { + removedGraphs.commit(); + addedGraphs.commit(); + removed.commit(); + added.commit(); + base.commit(); + } + + @Override + public void end() { + removedGraphs.end(); + addedGraphs.end(); + removed.end(); + added.end(); + base.end(); + } + + @Override + public boolean isInTransaction() { + return base.isInTransaction(); + } + + @Override + public void begin(TxnType type) { + base.begin(type); + added.begin(type); + removed.begin(type); + addedGraphs.begin(type); + removedGraphs.begin(type); + } + + @Override + public boolean promote(Promote mode) { + added.promote(mode); + removed.promote(mode); + addedGraphs.promote(mode); + removedGraphs.promote(mode); + boolean result = base.promote(mode); + return result; + } + + @Override + public ReadWrite transactionMode() { + ReadWrite result = base.transactionMode(); + return result; + } + + @Override + public TxnType transactionType() { + TxnType result = base.transactionType(); + return result; + } + + + public static void mutate(Transactional target, final Consumer mutator, final T payload) { + if (target.isInTransaction()) { + if (!target.transactionMode().equals(WRITE)) { + TxnType mode = target.transactionType(); + switch (mode) { + case WRITE: + break; + case READ: + throw new JenaTransactionException("Tried to write inside a READ transaction!"); + case READ_COMMITTED_PROMOTE: + case READ_PROMOTE: + throw new RuntimeException("promotion not implemented"); +// boolean readCommitted = (mode == TxnType.READ_COMMITTED_PROMOTE); +// promote(readCommitted); + //break; + } + } + + mutator.accept(payload); + } else executeWrite(target, () -> { + mutator.accept(payload); + }); + } + +} diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/PrefixMapAdapter.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/PrefixMapAdapter.java new file mode 100644 index 000000000..a1c28ebf5 --- /dev/null +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/PrefixMapAdapter.java @@ -0,0 +1,104 @@ +package org.aksw.jena_sparql_api.utils.model; + +import java.util.Map; + +import org.apache.jena.atlas.lib.Pair; +import org.apache.jena.riot.system.PrefixMap; +import org.apache.jena.riot.system.PrefixMapBase; +import org.apache.jena.shared.PrefixMapping; + + +/** + * Wrapper to use a {@link PrefixMapping} in places where a {@link PrefixMap} + * is required. + * + * @author raven + * + */ +public class PrefixMapAdapter + extends PrefixMapBase +{ + protected PrefixMapping prefixMapping; + + public PrefixMapAdapter(PrefixMapping prefixMapping) { + super(); + this.prefixMapping = prefixMapping; + } + + @Override + public Map getMapping() { + return prefixMapping.getNsPrefixMap(); + } + + @Override + public void add(String prefix, String iriString) { + prefixMapping.setNsPrefix(prefix, iriString); + } + + @Override + public void delete(String prefix) { + prefixMapping.removeNsPrefix(prefix); + } + + @Override + public void clear() { + prefixMapping.clearNsPrefixMap(); + } + + @Override + public boolean containsPrefix(String prefix) { + return prefixMapping.getNsPrefixURI(prefix) != null; + } + + @Override + public String abbreviate(String uriStr) { + return prefixMapping.shortForm(uriStr); + } + + @Override + public Pair abbrev(String uriStr) { + Pair result = null; + String shortForm = prefixMapping.shortForm(uriStr); + + // Note: Contract of shortForm forbids null result + if (shortForm != uriStr) { + int splitPoint = shortForm.indexOf(':'); + if (splitPoint >= 0) { + String prefix = shortForm.substring(0, splitPoint); + + // Validate the prefix for robustness: + // The split may fail if a prefix already contained a colon + if (prefixMapping.getNsPrefixURI(prefix) != null) { + String localName = shortForm.substring(splitPoint + 1); + result = Pair.create(prefix, localName); + } + } + } + + return result; + } + + @Override + public String expand(String prefix, String localName) { + String iri = prefixMapping.getNsPrefixURI(prefix); + String result = iri != null + ? iri + localName + : null; + + return result; + } + + @Override + public boolean isEmpty() { + return prefixMapping.hasNoMappings(); + } + + @Override + public int size() { + return prefixMapping.numPrefixes(); + } + + public static PrefixMap wrap(PrefixMapping prefixMapping) { + return new PrefixMapAdapter(prefixMapping); + } +} diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalCollection.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalCollection.java new file mode 100644 index 000000000..e31d8d0ed --- /dev/null +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalCollection.java @@ -0,0 +1,10 @@ +package org.aksw.jena_sparql_api.utils.model; + +import java.util.Collection; + +import org.apache.jena.sparql.core.Transactional; + +public interface TransactionalCollection + extends Transactional, Collection +{ +} \ No newline at end of file diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalCollectionImpl.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalCollectionImpl.java new file mode 100644 index 000000000..24f5a0fca --- /dev/null +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalCollectionImpl.java @@ -0,0 +1,239 @@ +package org.aksw.jena_sparql_api.utils.model; + +import static org.apache.jena.query.ReadWrite.WRITE; +import static org.apache.jena.system.Txn.calculateRead; +import static org.apache.jena.system.Txn.executeWrite; + +import java.util.AbstractCollection; +import java.util.Collection; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; + +import org.apache.jena.query.ReadWrite; +import org.apache.jena.query.TxnType; +import org.apache.jena.shared.Lock; +import org.apache.jena.shared.LockMRPlusSW; +import org.apache.jena.sparql.JenaTransactionException; + +/** + * A wrapper for a collection that provides transactions using MRSW locking. + * + * @author raven + * + * @param + * @param + */ +public class TransactionalCollectionImpl> + extends AbstractCollection + implements TransactionalCollection +{ + private Lock transactionLock = new LockMRPlusSW(); +// private final ReentrantLock systemLock = new ReentrantLock(true); + + protected AtomicReference master = new AtomicReference<>(); + + private final AtomicLong generation = new AtomicLong(0) ; + + protected ThreadLocal> txnState = ThreadLocal.withInitial(() -> null); + + public TransactionalCollectionImpl(C baseCollection, Function cloner) { + super(); + this.master.set(baseCollection); + this.cloner = cloner; + } + + public static class TxnState> { + public TxnState(TxnType txnType, ReadWrite txnMode, C local, long version) { + super(); + this.txnType = txnType; + this.txnMode = txnMode; + this.local = local; + this.version = version; + } + TxnType txnType; + ReadWrite txnMode; + C local; + long version; + } + + protected Function cloner; + + + @Override + public void commit() { + TxnState state = txnState.get(); + master.set(state.local); + } + + @Override + public void abort() { + end(); + } + + @Override + public void end() { + txnState.remove(); + transactionLock.leaveCriticalSection(); + } + + @Override + public void begin(ReadWrite readWrite) { + begin(TxnType.convert(readWrite)); + } + + @Override + public void begin(TxnType txnType) { + if (isInTransaction()) { + throw new JenaTransactionException("Transactions cannot be nested!"); + } + + ReadWrite txnMode = TxnType.initial(txnType); + _begin(txnType, txnMode); + } + + private void _begin(TxnType txnType, ReadWrite txnMode) { + transactionLock.enterCriticalSection(txnMode.equals(ReadWrite.READ)); // get the dataset write lock, if needed. + + C clone; + if (txnMode.equals(ReadWrite.WRITE)) { + clone = cloner.apply(master.get()); + } else { + clone = master.get(); + } + + long version = generation.get(); + + txnState.set(new TxnState(txnType, txnMode, clone, version)); + +// withLock(systemLock, () ->{ +// version.set(generation.get()); +// }) ; + } + +// public static void withLock(java.util.concurrent.locks.Lock lock, Runnable action) { +// lock.lock(); +// try { action.run(); } +// finally { +// lock.unlock(); +// } +// } + + + @Override + public boolean promote(Promote promoteMode) { + if (!isInTransaction()) + throw new JenaTransactionException("Tried to promote outside a transaction!"); + if ( transactionMode().equals(ReadWrite.WRITE) ) + return true; + + if ( transactionType() == TxnType.READ ) + return false; + + boolean readCommitted = (promoteMode == Promote.READ_COMMITTED); + + try { + _promote(readCommitted); + return true; + } catch (JenaTransactionException ex) { + return false ; + } + } + + private void _promote(boolean readCommited) { + // Outside lock. + if ( ! readCommited && txnState.get().version != generation.get() ) { + // This tests for any commited writers since this transaction started. + // This does not catch the case of a currently active writer + // that has not gone to commit or abort yet. + // The final test is after we obtain the transactionLock. + throw new JenaTransactionException("Dataset changed - can't promote") ; + } + + // Blocking on other writers. + transactionLock.enterCriticalSection(Lock.WRITE); + + TxnState local = txnState.get(); + // Check again now we are inside the lock. + if ( ! readCommited && local.version != generation.get() ) { + // Can't promote - release the lock. + transactionLock.leaveCriticalSection(); + throw new JenaTransactionException("Concurrent writer changed the dataset : can't promote") ; + } + // We have the lock and we have promoted! + local.txnMode = WRITE; + _begin(transactionType(), ReadWrite.WRITE) ; + } + + @Override + public ReadWrite transactionMode() { + return txnState.get().txnMode; + } + + @Override + public TxnType transactionType() { + return txnState.get().txnType; + } + + @Override + public boolean isInTransaction() { + return txnState.get() != null; + } + + private X access(Function source) { + return isInTransaction() + ? source.apply(txnState.get().local) + : calculateRead(this, () -> source.apply(txnState.get().local)); + } + + protected R mutate(Function action) { + @SuppressWarnings("unchecked") + R[] result = (R[])new Object[]{null}; + + if (isInTransaction()) { + if (!transactionMode().equals(WRITE)) { + TxnType mode = transactionType(); + switch (mode) { + case WRITE: + break; + case READ: + throw new JenaTransactionException("Tried to write inside a READ transaction!"); + case READ_COMMITTED_PROMOTE: + case READ_PROMOTE: + throw new RuntimeException("promotion not implemented"); +// boolean readCommitted = (mode == TxnType.READ_COMMITTED_PROMOTE); +// promote(readCommitted); + //break; + } + } + + result[0] = action.apply(txnState.get().local); + } else executeWrite(this, () -> { +// System.out.println(version.get()); + result[0] = action.apply(txnState.get().local); + }); + return result[0]; + } + + + @Override + public boolean add(T e) { + return mutate(c -> c.add(e)); + } + + @Override + public Iterator iterator() { + return access(C::iterator); + } + + @Override + public boolean contains(Object o) { + return access(c -> c.contains(o)); + } + + @Override + public int size() { + return access(C::size); + } +} \ No newline at end of file diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalSet.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalSet.java new file mode 100644 index 000000000..2f7499069 --- /dev/null +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalSet.java @@ -0,0 +1,8 @@ +package org.aksw.jena_sparql_api.utils.model; + +import java.util.Set; + +public interface TransactionalSet + extends TransactionalCollection, Set +{ +} diff --git a/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalSetImpl.java b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalSetImpl.java new file mode 100644 index 000000000..2250c2feb --- /dev/null +++ b/jena-sparql-api-utils/src/main/java/org/aksw/jena_sparql_api/utils/model/TransactionalSetImpl.java @@ -0,0 +1,18 @@ +package org.aksw.jena_sparql_api.utils.model; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.function.Function; + +public class TransactionalSetImpl + extends TransactionalCollectionImpl> + implements TransactionalSet +{ + public TransactionalSetImpl() { + super(new LinkedHashSet<>(), set -> new LinkedHashSet<>(set)); + } + + public TransactionalSetImpl(Set baseCollection, Function, ? extends Set> cloner) { + super(baseCollection, cloner); + } +} diff --git a/jena-sparql-api-utils/src/test/java/org/aksw/jena_sparql_api/utils/RangeTests.java b/jena-sparql-api-utils/src/test/java/org/aksw/jena_sparql_api/utils/RangeTests.java new file mode 100644 index 000000000..8c23953f0 --- /dev/null +++ b/jena-sparql-api-utils/src/test/java/org/aksw/jena_sparql_api/utils/RangeTests.java @@ -0,0 +1,36 @@ +package org.aksw.jena_sparql_api.utils; + +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.collect.Range; + + +public class RangeTests { + @Test + public void testRangeToLimit() { + Assert.assertEquals(1l, QueryUtils.rangeToLimit(Range.closed(5l, 5l))); + Assert.assertEquals(0l, QueryUtils.rangeToLimit(Range.closedOpen(5l, 5l))); + } + + @Test + public void testRangeOfZeroLength() { + Query query = QueryFactory.create("SELECT * { ?s ?p ?o } LIMIT 0 OFFSET 5"); + Range range = QueryUtils.toRange(query); + Assert.assertEquals(Range.closedOpen(5l, 5l), range); + //System.out.println(QueryUtils.makeClosedOpen(range, DiscreteDomain.longs())); + } + + @Test + public void testSubRangeOfZeroLength() { + Query query = QueryFactory.create("SELECT * { ?s ?p ?o } LIMIT 10 OFFSET 5"); + Range range = QueryUtils.toRange(query); + Assert.assertEquals(Range.closedOpen(5l, 15l), range); + + Range effectiveRange = QueryUtils.subRange(range, Range.closedOpen(0l, 10l)); + Assert.assertEquals(Range.closedOpen(5l, 15l), effectiveRange); + } + +} diff --git a/jena-sparql-api-utils/src/test/java/org/aksw/jena_sparql_api/utils/model/TestDatasetGraphDiff.java b/jena-sparql-api-utils/src/test/java/org/aksw/jena_sparql_api/utils/model/TestDatasetGraphDiff.java new file mode 100644 index 000000000..5a4e48fbc --- /dev/null +++ b/jena-sparql-api-utils/src/test/java/org/aksw/jena_sparql_api/utils/model/TestDatasetGraphDiff.java @@ -0,0 +1,55 @@ +package org.aksw.jena_sparql_api.utils.model; + +import java.util.Collections; + +import org.apache.jena.graph.Node; +import org.apache.jena.query.DatasetFactory; +import org.apache.jena.query.ReadWrite; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RDFFormat; +import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.graph.GraphFactory; +import org.apache.jena.sparql.sse.SSE; +import org.junit.Assert; +import org.junit.Test; + + +public class TestDatasetGraphDiff { + + @Test + public void test() { + DatasetGraphDiff dg = new DatasetGraphDiff(); + + Node g = SSE.parseNode(":g"); + Quad q = SSE.parseQuad("(quad :g :s :p :o)"); + dg.addGraph(g, GraphFactory.createDefaultGraph()); + dg.add(q); + + Assert.assertEquals(Collections.singleton(g), dg.getAddedGraphs()); + + dg.begin(ReadWrite.WRITE); + + dg.materialize(); + + //dg.abort(); + dg.commit(); + + dg.removeGraph(g); + + System.err.println("Effective Dataset:"); + System.err.println("-----------------"); + RDFDataMgr.write(System.err, DatasetFactory.wrap(dg), RDFFormat.TRIG_BLOCKS); + + System.err.println("Added Quads:"); + System.err.println("-----------------"); + RDFDataMgr.write(System.err, DatasetFactory.wrap(dg.getAdded()), RDFFormat.TRIG_BLOCKS); + + System.err.println("Removed Quads:"); + System.err.println("-----------------"); + RDFDataMgr.write(System.err, DatasetFactory.wrap(dg.getRemoved()), RDFFormat.TRIG_BLOCKS); + + System.err.println("Added Graphs: " + dg.getAddedGraphs()); + System.err.println("Removed Graphs:" + dg.getRemovedGraphs()); + + } +} diff --git a/jena-sparql-api-views/pom.xml b/jena-sparql-api-views/pom.xml index 96dcd57f8..a9af7d2da 100644 --- a/jena-sparql-api-views/pom.xml +++ b/jena-sparql-api-views/pom.xml @@ -10,7 +10,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-virtuoso/pom.xml b/jena-sparql-api-virtuoso/pom.xml index 52c572364..624548d9c 100644 --- a/jena-sparql-api-virtuoso/pom.xml +++ b/jena-sparql-api-virtuoso/pom.xml @@ -12,7 +12,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-vocabs/pom.xml b/jena-sparql-api-vocabs/pom.xml index de28e5350..61ad24de0 100644 --- a/jena-sparql-api-vocabs/pom.xml +++ b/jena-sparql-api-vocabs/pom.xml @@ -8,7 +8,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 diff --git a/jena-sparql-api-web/pom.xml b/jena-sparql-api-web/pom.xml index 1ee330177..d59b35856 100644 --- a/jena-sparql-api-web/pom.xml +++ b/jena-sparql-api-web/pom.xml @@ -6,7 +6,7 @@ org.aksw.jena-sparql-api jena-sparql-api-parent - 3.16.0-1 + 3.16.0-2 jena-sparql-api-web diff --git a/pom.xml b/pom.xml index 019c58971..f27dd7884 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ jena-sparql-api-parent pom - 3.16.0-1 + 3.16.0-2 Jena SPARQL API - Parent An RDF library for abstracting SPARQL queries. https://github.com/SmartDataAnalytics/jena-sparql-api @@ -122,7 +122,7 @@ 4.3.23.RELEASE 4.0.0.RELEASE - 0.8.14 + 0.8.15 @@ -643,14 +643,21 @@ + +
+ + + org.apache.jena + jena-dboe-storage + ${jena.version} - - - commons-codec - commons-codec - 1.14 - + + + commons-codec + commons-codec + 1.14 + org.apache.jena @@ -1171,7 +1178,7 @@ io.reactivex.rxjava3 rxjava - 3.0.4 + 3.0.6 @@ -1325,18 +1332,9 @@ true - +