From bd619559ac8859091623484e954ff58b1adcd84f Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Fri, 23 Jul 2021 14:05:38 +0200 Subject: [PATCH] WIP around problems with Range Index efficiency --- .../exist/collections/CollectionConfigurationManager.java | 2 +- .../main/java/org/exist/collections/MutableCollection.java | 7 ++++--- .../main/java/org/exist/xquery/value/ValueSequence.java | 2 +- .../java/org/exist/indexing/range/RangeIndexConfig.java | 6 ++++-- .../exist/xquery/modules/range/OptimizeFieldPragma.java | 6 +++++- .../org/exist/xquery/modules/range/RangeQueryRewriter.java | 4 ++-- 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/exist-core/src/main/java/org/exist/collections/CollectionConfigurationManager.java b/exist-core/src/main/java/org/exist/collections/CollectionConfigurationManager.java index 45038de6d3a..c02be330af0 100644 --- a/exist-core/src/main/java/org/exist/collections/CollectionConfigurationManager.java +++ b/exist-core/src/main/java/org/exist/collections/CollectionConfigurationManager.java @@ -221,7 +221,7 @@ protected CollectionConfiguration getConfiguration(final Collection collection) } // use default configuration - return defaultConfig; + return defaultConfig; // TODO(AR) this can result in a lot of the same configs being added to lists and maps } } diff --git a/exist-core/src/main/java/org/exist/collections/MutableCollection.java b/exist-core/src/main/java/org/exist/collections/MutableCollection.java index d44a6a01b33..ed2c15f3645 100644 --- a/exist-core/src/main/java/org/exist/collections/MutableCollection.java +++ b/exist-core/src/main/java/org/exist/collections/MutableCollection.java @@ -1970,11 +1970,12 @@ private void releaseReader(final DBBroker broker, final XMLReader reader) { public IndexSpec getIndexConfiguration(final DBBroker broker) { final CollectionConfiguration conf = getConfiguration(broker); //If the collection has its own config... - if (conf == null) { - return broker.getIndexConfiguration(); + if (conf != null) { + return conf.getIndexConfiguration(); } //... otherwise return the general config (the broker's one) - return conf.getIndexConfiguration(); + // TODO(AR) what is the difference between this and CollectionConfigurationManagement#defaultConfig + return broker.getIndexConfiguration(); // TODO(AR) this can result in a lot of the same object being added to lists and maps } @Override diff --git a/exist-core/src/main/java/org/exist/xquery/value/ValueSequence.java b/exist-core/src/main/java/org/exist/xquery/value/ValueSequence.java index bda28ec6058..fe5789b5c17 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/ValueSequence.java +++ b/exist-core/src/main/java/org/exist/xquery/value/ValueSequence.java @@ -879,7 +879,7 @@ public int compare(final Item o1, final Item o2) { private class CollectionIterator implements Iterator { private Collection nextCollection = null; private int pos = 0; - +//TODO(AR) can this return the same collection multiple times? should it? CollectionIterator() { next(); } diff --git a/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfig.java b/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfig.java index 910f3ea4564..84b44badc00 100644 --- a/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfig.java +++ b/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfig.java @@ -48,11 +48,13 @@ public class RangeIndexConfig { private static final Logger LOG = LogManager.getLogger(RangeIndexConfig.class); - private Map paths = new TreeMap<>(); + // treemap is used to preserve order + private final Map paths = new TreeMap<>(); private Analyzer analyzer; - private PathIterator iterator = new PathIterator(); + // TODO(AR) should start as null + private final PathIterator iterator = new PathIterator(); public RangeIndexConfig() { // default analyzer diff --git a/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/OptimizeFieldPragma.java b/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/OptimizeFieldPragma.java index 2e4ac2a2e3f..2ef81fb91ef 100644 --- a/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/OptimizeFieldPragma.java +++ b/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/OptimizeFieldPragma.java @@ -95,7 +95,9 @@ private Expression tryRewriteToFields(LocationStep locationStep, final Predicate // without context path, we cannot rewrite the entire query if (contextPath != null) { final List notOptimizable = new ArrayList<>(preds.length); - List configs = getConfigurations(contextSequence); + + // TODO(AR) this list contains many duplicates, as the contextSequence can contain nodes from the same collection + List configs = getConfigurations(contextSequence); //TODO(AR) this is returning one config per-collection in the db, yet we may know the first call (locationStep.parent) is fn:collection - so this could be much more efficient.. // walk through the predicates attached to the current location step // check if expression can be optimized @@ -292,6 +294,8 @@ private List findConfigurations(NodePath path, L return rices; } + // TODO(AR) is this returning too many configs? -- 296 are returned for benjamins + // TODO(AR) how are these arranged, a list does not seem efficient private List getConfigurations(Sequence contextSequence) { List configs = new ArrayList<>(); for (final Iterator i = contextSequence.getCollectionIterator(); i.hasNext(); ) { diff --git a/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/RangeQueryRewriter.java b/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/RangeQueryRewriter.java index 827f97e2e8d..d7a77794de9 100644 --- a/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/RangeQueryRewriter.java +++ b/extensions/indexes/range/src/main/java/org/exist/xquery/modules/range/RangeQueryRewriter.java @@ -63,7 +63,7 @@ public Pragma rewriteLocationStep(final LocationStep locationStep) throws XPathE NodePath contextPath = toNodePath(getPrecedingSteps(locationStep)); // process the remaining predicates for (Predicate pred : preds) { - if (pred.getLength() != 1) { + if (pred.getSubExpressionCount() != 1) { // can only optimize predicates with one expression break; } @@ -127,7 +127,7 @@ public Pragma rewriteLocationStep(final LocationStep locationStep) throws XPathE } protected static Lookup rewrite(Expression expression, NodePath path) throws XPathException { - ArrayList eqArgs = new ArrayList<>(2); + final List eqArgs = new ArrayList<>(2); if (expression instanceof GeneralComparison) { GeneralComparison comparison = (GeneralComparison) expression; eqArgs.add(comparison.getLeft());