From 8bc4194377bcd803df5642d05550d3b2af76c92f Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Wed, 18 Sep 2024 13:31:51 -0700 Subject: [PATCH] QueryTranslator: Implemented predictive index support --- .../Query/SQLiteKeyStore+PredictiveIndexes.cc | 25 +++-- LiteCore/Query/Translator/ExprNodes.cc | 2 + LiteCore/Query/Translator/IndexedNodes.cc | 105 ++++++++++++------ LiteCore/Query/Translator/IndexedNodes.hh | 28 +++-- LiteCore/Query/Translator/Node.cc | 3 +- LiteCore/Query/Translator/Node.hh | 27 ++++- LiteCore/Query/Translator/NodesToSQL.cc | 2 +- LiteCore/Query/Translator/QueryTranslator.cc | 74 ++++++++---- LiteCore/Query/Translator/QueryTranslator.hh | 6 +- LiteCore/Query/Translator/SelectNodes.cc | 38 ++++--- LiteCore/Query/Translator/TranslatorTables.hh | 2 + LiteCore/Query/Translator/TranslatorUtils.cc | 8 +- LiteCore/Query/Translator/TranslatorUtils.hh | 1 + LiteCore/tests/PredictiveQueryTest.cc | 9 +- LiteCore/tests/QueryTranslatorTest.cc | 56 ++++++---- 15 files changed, 262 insertions(+), 124 deletions(-) diff --git a/LiteCore/Query/SQLiteKeyStore+PredictiveIndexes.cc b/LiteCore/Query/SQLiteKeyStore+PredictiveIndexes.cc index 8eff2b20d..3a77f2a47 100644 --- a/LiteCore/Query/SQLiteKeyStore+PredictiveIndexes.cc +++ b/LiteCore/Query/SQLiteKeyStore+PredictiveIndexes.cc @@ -61,7 +61,7 @@ namespace litecore { // Derive the table name from the expression (path) it unnests: auto kvTableName = tableName(); auto q_kvTableName = quotedTableName(); - QueryTranslator qp(db(), "", kvTableName); + QueryTranslator qp(db(), string(kDefaultCollectionName), kvTableName); auto predTableName = qp.predictiveTableName((FLValue)expression); // Create the index table, unless an identical one already exists: @@ -75,23 +75,30 @@ namespace litecore { if ( !db().schemaExistsWithSQL(predTableName, "table", predTableName, sql) ) { LogTo(QueryLog, "Creating predictive table '%s' on %s", predTableName.c_str(), expression->toJSONString().c_str()); + // Capture the SQL of the `predict(...)` call, _before_ creating the table. + // (If we created the table first, the query translator would generate SQL that used it!) + string predictExpr = qp.expressionSQL((FLValue)expression); + qp.setBodyColumnName("new.body"); + string triggerPredictExpr = qp.expressionSQL((FLValue)expression); + + // Create the index-table: + LogTo(QueryLog, "Creating predictive index table: %s", sql.c_str()); db().exec(sql); // Populate the index-table with data from existing documents: - string predictExpr = qp.expressionSQL((FLValue)expression); - db().exec(CONCAT("INSERT INTO " << sqlIdentifier(predTableName) - << " (docid, body) " - "SELECT rowid, " - << predictExpr << "FROM " << q_kvTableName << " WHERE (flags & 1) = 0")); + sql = CONCAT("INSERT INTO " << sqlIdentifier(predTableName) + << " (docid, body) " + "SELECT rowid, " + << predictExpr << "FROM " << q_kvTableName << " as _doc WHERE (flags & 1) = 0"); + LogTo(QueryLog, "Populating predictive index table: %s", sql.c_str()); + db().exec(sql); // Set up triggers to keep the index-table up to date // ...on insertion: - qp.setBodyColumnName("new.body"); - predictExpr = qp.expressionSQL((FLValue)expression); string insertTriggerExpr = CONCAT("INSERT INTO " << sqlIdentifier(predTableName) << " (docid, body) " "VALUES (new.rowid, " - << predictExpr << ")"); + << triggerPredictExpr << ")"); createTrigger(predTableName, "ins", "AFTER INSERT", "WHEN (new.flags & 1) = 0", insertTriggerExpr); // ...on delete: diff --git a/LiteCore/Query/Translator/ExprNodes.cc b/LiteCore/Query/Translator/ExprNodes.cc index dd4dba281..d82cb6af7 100644 --- a/LiteCore/Query/Translator/ExprNodes.cc +++ b/LiteCore/Query/Translator/ExprNodes.cc @@ -126,6 +126,8 @@ namespace litecore::qt { #ifdef COUCHBASE_ENTERPRISE case OpType::vectorDistance: return new (ctx) VectorDistanceNode(operands, ctx); + case OpType::prediction: + return PredictionNode::parse(operands, ctx); #endif default: // A normal OpNode diff --git a/LiteCore/Query/Translator/IndexedNodes.cc b/LiteCore/Query/Translator/IndexedNodes.cc index 4d4feb935..f6e483f54 100644 --- a/LiteCore/Query/Translator/IndexedNodes.cc +++ b/LiteCore/Query/Translator/IndexedNodes.cc @@ -24,7 +24,25 @@ namespace litecore::qt { using namespace fleece; // indexed by IndexType: - constexpr const char* kOwnerFnName[2] = {"MATCH", "APPROX_VECTOR_DISTANCE"}; + constexpr const char* kIndexTypeName[3] = {"FTS", "vector", "predictive"}; + constexpr const char* kOwnerFnName[3] = {"MATCH", "APPROX_VECTOR_DISTANCE", "PREDICTION"}; + + void IndexedNode::setIndexedExpression(ExprNode* expression) { + _indexedExpr = expression; + expression->visitTree([&](Node& n, unsigned /*depth*/) { + if ( SourceNode* nodeSource = n.source() ) { + require(_sourceCollection == nullptr || _sourceCollection == nodeSource, + "1st argument to %s may only refer to a single collection", kOwnerFnName[int(_type)]); + _sourceCollection = nodeSource; + } + }); + require(_sourceCollection, "unknown source collection for %s()", kOwnerFnName[int(_type)]); + } + + void IndexedNode::writeSourceTable(SQLWriter& ctx, string_view tableName) const { + require(!tableName.empty(), "missing %s index", kIndexTypeName[int(_type)]); + ctx << sqlIdentifier(tableName); + } #pragma mark - FTS: @@ -38,13 +56,8 @@ namespace litecore::qt { require(source, "unknown source collection for %s()", name); require(source->isCollection(), "invalid source collection for %s()", name); require(path.count() > 0, "missing property after collection alias in %s()", name); - _sourceCollection = source; - _indexExpressionJSON = string(path.toString()); - } - - void FTSNode::writeSourceTable(SQLWriter& ctx, string_view tableName) const { - require(!tableName.empty(), "missing FTS index"); - ctx << sqlIdentifier(tableName); + _sourceCollection = source; + _indexID = ctx.newString(path.toString()); } void FTSNode::writeIndex(SQLWriter& sql) const { @@ -71,11 +84,10 @@ namespace litecore::qt { ctx << "))"; } -#pragma mark - VECTOR: - - #ifdef COUCHBASE_ENTERPRISE +# pragma mark - VECTOR: + // A SQLite vector MATCH expression; used by VectorDistanceNode to add a join condition. class VectorMatchNode final : public ExprNode { public: @@ -90,19 +102,9 @@ namespace litecore::qt { ExprNode* _vector; }; - VectorDistanceNode::VectorDistanceNode(Array::iterator& args, ParseContext& ctx) - : IndexedNode(IndexType::vector), _indexedExpr(parse(args[0], ctx)) { + VectorDistanceNode::VectorDistanceNode(Array::iterator& args, ParseContext& ctx) : IndexedNode(IndexType::vector) { // Determine which collection the vector is based on: - SourceNode* source = nullptr; - _indexedExpr->visitTree([&](Node& n, unsigned /*depth*/) { - if ( SourceNode* nodeSource = n.source() ) { - require(source == nullptr || source == nodeSource, - "1st argument (vector) to APPROX_VECTOR_DISTANCE may only refer to a single collection"); - source = nodeSource; - } - }); - require(source, "unknown source collection for APPROX_VECTOR_DISTANCE()"); - _sourceCollection = source; + setIndexedExpression(ExprNode::parse(args[0], ctx)); // Create the JSON expression used to locate the index: string indexExpr(args[0].toJSON(false, true)); @@ -118,7 +120,7 @@ namespace litecore::qt { replace(indexExpr, "[\"." + prefix + ".", "[\"."); } } - _indexExpressionJSON = ctx.newString(indexExpr); + _indexID = ctx.newString(indexExpr); _vector = ExprNode::parse(args[1], ctx); @@ -183,8 +185,7 @@ namespace litecore::qt { } void VectorDistanceNode::writeSourceTable(SQLWriter& sql, string_view tableName) const { - require(!tableName.empty(), "missing vector index"); - if ( _simple ) { + if ( _simple && !tableName.empty() ) { // In a "simple" vector match, run the vector query as a nested SELECT: sql << "(SELECT docid, distance FROM " << sqlIdentifier(tableName) << " WHERE vector MATCH encode_vector(" << _vector << ")"; @@ -193,7 +194,7 @@ namespace litecore::qt { require(limit, "a LIMIT must be given when using APPROX_VECTOR_DISTANCE()"); sql << " LIMIT " << limit << ")"; } else { - sql << sqlIdentifier(tableName); + IndexedNode::writeSourceTable(sql, tableName); } } @@ -203,6 +204,49 @@ namespace litecore::qt { ctx << sqlIdentifier(_indexSource->alias()) << ".distance"; } +# pragma mark - PREDICTION: + + ExprNode* PredictionNode::parse(Array::iterator args, ParseContext& ctx) { + // Unlike a vector or FTS query, a prediction() is not required to have an index. + // Check whether one exists. Unfortunately, the index identifier is based on the entire + // expression array including the first item `PREDICTION()` which isn't in the iterator, + // so we have to reconstruct it: + auto expr = MutableArray::newArray(); + expr.append("PREDICTION()"); + expr.append(args[0]); + expr.append(args[1]); + string id = expressionIdentifier(expr); + + if ( ctx.delegate.hasPredictiveIndex(id) ) { + return new (ctx) PredictionNode(args, ctx, id); + } else { + return FunctionNode::parse(kPredictionFnName, args, ctx); + } + } + + PredictionNode::PredictionNode(Array::iterator& args, ParseContext& ctx, string_view indexID) + : IndexedNode(IndexType::prediction) { + _indexID = ctx.newString(indexID); + setIndexedExpression(ExprNode::parse(args[1], ctx)); + if ( args.count() > 2 ) { + slice pathStr = requiredString(args[2], "property path of PREDICTION()"); + KeyPath path = parsePath(pathStr); + require(path.count() > 0, "invalid property path in PREDICTION()"); + _subProperty = ctx.newString(path.toString()); + } + } + + void PredictionNode::writeSQL(SQLWriter& out) const { + auto alias = sqlIdentifier(_indexSource->alias()); + if ( _subProperty ) { + out << kUnnestedValueFnName << "(" << alias << ".body, " << sqlString(_subProperty); + out << ")"; + } else { + out << kRootFnName << "(" << alias << ".body)"; + } + } + + #endif @@ -221,15 +265,14 @@ namespace litecore::qt { } bool IndexSourceNode::matchesNode(const IndexedNode* node) const { - return _indexedNode->indexType() == node->indexType() - && _indexedNode->indexExpressionJSON() == node->indexExpressionJSON() + return _indexedNode->indexType() == node->indexType() && _indexedNode->indexID() == node->indexID() && collection() == node->sourceCollection()->collection() && scope() == node->sourceCollection()->scope(); } IndexType IndexSourceNode::indexType() const { return _indexedNode->indexType(); } - string_view IndexSourceNode::indexedExpressionJSON() const { return _indexedNode->indexExpressionJSON(); } + string_view IndexSourceNode::indexID() const { return _indexedNode->indexID(); } void IndexSourceNode::addIndexedNode(IndexedNode* node) { Assert(node != _indexedNode && node->indexType() == _indexedNode->indexType()); @@ -300,7 +343,7 @@ namespace litecore::qt { /// Adds a SourceNode for an IndexedNode, or finds an existing one. /// Sets the source as its indexSource. void SelectNode::addIndexForNode(IndexedNode* node, ParseContext& ctx) { - DebugAssert(!node->indexExpressionJSON().empty()); + DebugAssert(!node->indexID().empty()); // Look for an existing index source: IndexSourceNode* indexSrc = nullptr; diff --git a/LiteCore/Query/Translator/IndexedNodes.hh b/LiteCore/Query/Translator/IndexedNodes.hh index 5a60ba098..798873909 100644 --- a/LiteCore/Query/Translator/IndexedNodes.hh +++ b/LiteCore/Query/Translator/IndexedNodes.hh @@ -29,8 +29,8 @@ namespace litecore::qt { public: IndexType indexType() const { return _type; } - /// JSON of the indexed expression, usually a property - string_view indexExpressionJSON() const { return _indexExpressionJSON; } + /// A unique identifier of the indexed expression, used to match it with an IndexSourceNode. + string_view indexID() const { return _indexID; } /// The collection being searched. SourceNode* C4NULLABLE sourceCollection() const { return _sourceCollection; } @@ -49,13 +49,16 @@ namespace litecore::qt { bool isAuxiliary() const { return _isAuxiliary; } /// Writes SQL for the index table name (or SELECT expression) - virtual void writeSourceTable(SQLWriter& ctx, string_view tableName) const = 0; + virtual void writeSourceTable(SQLWriter& ctx, string_view tableName) const; protected: IndexedNode(IndexType type) : _type(type) {} + void setIndexedExpression(ExprNode*); + IndexType const _type; // Index type - string _indexExpressionJSON; // Expression/property that's indexed, as JSON + ExprNode* _indexedExpr; // The indexed expression (usually a doc property) + string_view _indexID; // Expression/property that's indexed SourceNode* C4NULLABLE _sourceCollection{}; // The collection being queried IndexSourceNode* C4NULLABLE _indexSource{}; // Source representing the index SelectNode* C4NULLABLE _select{}; // The containing SELECT statement @@ -67,7 +70,6 @@ namespace litecore::qt { protected: FTSNode(Array::iterator& args, ParseContext&, const char* name); - void writeSourceTable(SQLWriter& ctx, string_view tableName) const override; void writeIndex(SQLWriter&) const; }; @@ -111,13 +113,25 @@ namespace litecore::qt { void writeSQL(SQLWriter&) const override; private: - ExprNode* _indexedExpr; // The indexed expression (usually a doc property) ExprNode* _vector; // The vector being queried int _metric; // Distance metric (actually vectorsearch::Metric) unsigned _numProbes = 0; // Number of probes, or 0 for default bool _simple = true; // True if this is a simple (non-hybrid) query }; + /** A `prediction()` function call that uses an index. */ + class PredictionNode final : public IndexedNode { + public: + static ExprNode* parse(Array::iterator args, ParseContext&); + + void writeSQL(SQLWriter&) const override; + + private: + PredictionNode(Array::iterator& args, ParseContext& ctx, string_view indexID); + + const char* _subProperty{}; + }; + #endif #pragma mark - INDEX SOURCE: @@ -128,7 +142,7 @@ namespace litecore::qt { explicit IndexSourceNode(IndexedNode*, string_view alias, ParseContext& ctx); IndexType indexType() const; - string_view indexedExpressionJSON() const; + string_view indexID() const; bool matchesNode(IndexedNode const*) const; diff --git a/LiteCore/Query/Translator/Node.cc b/LiteCore/Query/Translator/Node.cc index 51157a6e3..07f3e2781 100644 --- a/LiteCore/Query/Translator/Node.cc +++ b/LiteCore/Query/Translator/Node.cc @@ -27,7 +27,8 @@ namespace litecore::qt { // Typical queries only allocate a few KB, not enough to fill a single chunk. static constexpr size_t kArenaChunkSize = 4000; - RootContext::RootContext() : Arena(kArenaChunkSize), ParseContext(*static_cast(this)) {} + RootContext::RootContext() + : Arena(kArenaChunkSize), ParseContext(*static_cast(this), *static_cast(this)) {} void* Node::operator new(size_t size, ParseContext& ctx) noexcept { return ctx.arena.alloc(size, alignof(Node)); } diff --git a/LiteCore/Query/Translator/Node.hh b/LiteCore/Query/Translator/Node.hh index 7d56e98b4..793f01177 100644 --- a/LiteCore/Query/Translator/Node.hh +++ b/LiteCore/Query/Translator/Node.hh @@ -21,6 +21,7 @@ #include #include #include +#include C4_ASSUME_NONNULL_BEGIN @@ -63,16 +64,32 @@ namespace litecore::qt { }; /** Types of indexes. */ - enum class IndexType { FTS, vector }; + enum class IndexType { + FTS, +#ifdef COUCHBASE_ENTERPRISE + vector, + prediction, +#endif + }; #pragma mark - PARSE CONTEXT: + struct ParseDelegate { +#ifdef COUCHBASE_ENTERPRISE + std::function hasPredictiveIndex; +#endif + }; + /** State used during parsing, passed down through the recursive descent. */ struct ParseContext { - ParseContext(Arena<>& a) : arena(a) {} + ParseContext(ParseDelegate& d, Arena<>& a) : delegate(d), arena(a) {} + + // not a copy constructor! Creates a new child context. + explicit ParseContext(ParseContext& parent) : delegate(parent.delegate), arena(parent.arena){}; - ParseContext(ParseContext const& parent) : arena(parent.arena){}; + ParseContext(ParseContext&&) = default; + ParseDelegate& delegate; Arena<>& arena; // The arena allocator SelectNode* C4NULLABLE select{}; // The enclosing SELECT, if any std::unordered_map aliases; // All of the sources & named results @@ -87,8 +104,10 @@ namespace litecore::qt { /** Top-level Context that provides an Arena, and destructs all Nodes in its destructor. */ struct RootContext : Arena<> + , public ParseDelegate , public ParseContext { - RootContext(); + explicit RootContext(); + RootContext(RootContext&&) = default; }; #pragma mark - NODE CLASS: diff --git a/LiteCore/Query/Translator/NodesToSQL.cc b/LiteCore/Query/Translator/NodesToSQL.cc index 6d80c4cb8..b7a607a21 100644 --- a/LiteCore/Query/Translator/NodesToSQL.cc +++ b/LiteCore/Query/Translator/NodesToSQL.cc @@ -67,7 +67,7 @@ namespace litecore::qt { void MetaNode::writeSQL(SQLWriter& ctx) const { string aliasDot; - if ( _source ) aliasDot = CONCAT(sqlIdentifier(_source->alias()) << "."); + if ( _source && !_source->alias().empty() ) aliasDot = CONCAT(sqlIdentifier(_source->alias()) << "."); writeMetaSQL(aliasDot, _property, ctx); } diff --git a/LiteCore/Query/Translator/QueryTranslator.cc b/LiteCore/Query/Translator/QueryTranslator.cc index c0d891d6f..6928754bb 100644 --- a/LiteCore/Query/Translator/QueryTranslator.cc +++ b/LiteCore/Query/Translator/QueryTranslator.cc @@ -35,8 +35,20 @@ namespace litecore { QueryTranslator::~QueryTranslator() = default; + RootContext QueryTranslator::makeRootContext() const { + RootContext root; +#ifdef COUCHBASE_ENTERPRISE + root.hasPredictiveIndex = [&](string_view id) -> bool { + string indexTable = _delegate.predictiveTableName(_defaultTableName, string(id)); + return _delegate.tableExists(indexTable); + }; +#endif + return root; + } + void QueryTranslator::parse(FLValue v) { - RootContext ctx; + RootContext ctx = makeRootContext(); + // Parse the query into a Node tree: SelectNode* query = new (ctx) SelectNode(v, ctx); query->postprocess(ctx); @@ -79,16 +91,16 @@ namespace litecore { } string QueryTranslator::expressionSQL(FLValue exprSource) { - RootContext ctx; - auto expr = ExprNode::parse(exprSource, ctx); - expr->postprocess(ctx); + RootContext ctx = makeRootContext(); + auto selectNode = new (ctx) SelectNode(exprSource, ctx); + selectNode->postprocess(ctx); // Set the SQLite table name for each SourceNode: - expr->visitTree([&](Node& node, unsigned /*depth*/) { + selectNode->visitTree([&](Node& node, unsigned /*depth*/) { if ( auto source = dynamic_cast(&node) ) assignTableNameToSource(source, ctx); }); - return writeSQL([&](SQLWriter& writer) { writer << *expr; }); + return writeSQL([&](SQLWriter& writer) { writer << *selectNode->where(); }); } void QueryTranslator::assignTableNameToSource(SourceNode* source, ParseContext& ctx) { @@ -122,15 +134,26 @@ namespace litecore { fail("no such collection \"%s\"", name.c_str()); if ( auto index = dynamic_cast(source) ) { - if ( index->indexType() == IndexType::FTS ) { - tableName = _delegate.FTSTableName(tableName, string(index->indexedExpressionJSON())); - _ftsTables.push_back(tableName); - } else if ( index->indexType() == IndexType::vector ) { + switch ( index->indexType() ) { + case IndexType::FTS: + tableName = _delegate.FTSTableName(tableName, string(index->indexID())); + _ftsTables.push_back(tableName); + break; #ifdef COUCHBASE_ENTERPRISE - auto vecSource = dynamic_cast(index->indexedNode()); - Assert(vecSource); - tableName = _delegate.vectorTableName(tableName, string(vecSource->indexExpressionJSON()), - vecSource->metric()); + case IndexType::vector: + { + auto vecSource = dynamic_cast(index->indexedNode()); + Assert(vecSource); + tableName = _delegate.vectorTableName(tableName, string(vecSource->indexID()), + vecSource->metric()); + break; + } + case IndexType::prediction: + { + auto predSource = index->indexedNode(); + tableName = _delegate.predictiveTableName(tableName, string(predSource->indexID())); + break; + } #endif } } else if ( source->isCollection() ) { @@ -147,7 +170,7 @@ namespace litecore { FLArrayIterator& whatExpressions, FLArray whereClause, bool isUnnestedTable) { _sql = writeSQL([&](SQLWriter& writer) { - RootContext ctx; + RootContext ctx = makeRootContext(); SourceNode* source; if ( isUnnestedTable ) { @@ -188,7 +211,7 @@ namespace litecore { string QueryTranslator::whereClauseSQL(FLValue exprSource, string_view dbAlias) { if ( !exprSource ) return ""; - RootContext ctx; + RootContext ctx = makeRootContext(); auto src = new (ctx) SourceNode(ctx.newString(dbAlias)); ctx.from = src; auto expr = ExprNode::parse(exprSource, ctx); @@ -197,7 +220,7 @@ namespace litecore { } string QueryTranslator::functionCallSQL(slice fnName, FLValue arg, FLValue param) { - RootContext ctx; + RootContext ctx = makeRootContext(); auto argExpr = ExprNode::parse(arg, ctx); argExpr->postprocess(ctx); ExprNode* paramExpr = nullptr; @@ -230,7 +253,7 @@ namespace litecore { } string QueryTranslator::unnestedTableName(FLValue flExpr) const { - RootContext ctx; + RootContext ctx = makeRootContext(); auto expr = ExprNode::parse(flExpr, ctx); expr->postprocess(ctx); @@ -244,7 +267,7 @@ namespace litecore { } string QueryTranslator::eachExpressionSQL(FLValue flExpr) { - RootContext ctx; + RootContext ctx = makeRootContext(); auto expr = ExprNode::parse(flExpr, ctx); auto prop = dynamic_cast(expr); @@ -253,5 +276,16 @@ namespace litecore { return writeSQL([&prop](SQLWriter& sql) { prop->writeSQL(sql); }); } - string QueryTranslator::predictiveTableName(FLValue) const { error::_throw(error::Unimplemented); } + string QueryTranslator::predictiveIdentifier(FLValue expression) const { + auto array = Value(expression).asArray(); + if ( array.count() < 2 || !array[0].asString().caseEquivalent("PREDICTION()") ) + fail("Invalid PREDICTION() call"); + return expressionIdentifier(array, 3); // ignore the output-property parameter + } + +#ifdef COUCHBASE_ENTERPRISE + string QueryTranslator::predictiveTableName(FLValue expression) const { + return _delegate.predictiveTableName(_defaultTableName, predictiveIdentifier(expression)); + } +#endif } // namespace litecore diff --git a/LiteCore/Query/Translator/QueryTranslator.hh b/LiteCore/Query/Translator/QueryTranslator.hh index 5f7ee3d32..20e799da0 100644 --- a/LiteCore/Query/Translator/QueryTranslator.hh +++ b/LiteCore/Query/Translator/QueryTranslator.hh @@ -23,6 +23,7 @@ namespace litecore { namespace qt { class Node; struct ParseContext; + struct RootContext; class SourceNode; class SQLWriter; } // namespace qt @@ -117,8 +118,9 @@ namespace litecore { string eachExpressionSQL(FLValue); string unnestedTableName(FLValue key) const; +#ifdef COUCHBASE_ENTERPRISE string predictiveTableName(FLValue) const; - +#endif private: QueryTranslator(const QueryTranslator& qp) = delete; QueryTranslator& operator=(const QueryTranslator&) = delete; @@ -126,6 +128,8 @@ namespace litecore { void assignTableNameToSource(qt::SourceNode*, qt::ParseContext&); string writeSQL(function_ref); string functionCallSQL(slice fnName, FLValue arg, FLValue C4NULLABLE param = nullptr); + string predictiveIdentifier(FLValue expression) const; + qt::RootContext makeRootContext() const; const Delegate& _delegate; // delegate object (SQLiteKeyStore) string _defaultTableName; // Name of the default table to use diff --git a/LiteCore/Query/Translator/SelectNodes.cc b/LiteCore/Query/Translator/SelectNodes.cc index 2f8223d84..2256f4856 100644 --- a/LiteCore/Query/Translator/SelectNodes.cc +++ b/LiteCore/Query/Translator/SelectNodes.cc @@ -303,6 +303,7 @@ namespace litecore::qt { } } + bool implicitWhere = false; if ( select ) { // Parse FROM first, because it creates the SourceNodes that affect parsing of properties: if ( Value from = getCaseInsensitive(select, "FROM") ) { @@ -375,7 +376,8 @@ namespace litecore::qt { } else { // If not given a Dict or ["SELECT",...], assume it's a WHERE clause: - addSource(new (ctx) SourceNode("_doc"), ctx); + implicitWhere = true; + addSource(new (ctx) SourceNode(""), ctx); setChild(_where, ExprNode::parse(v, ctx)); } @@ -406,23 +408,25 @@ namespace litecore::qt { // Locate FTS and vector indexed expressions and add corresponding SourceNodes: addIndexes(ctx); - for ( SourceNode* source : _sources ) { - if ( !source->_usesDeleted && source->_collection.empty() && source->isCollection() ) { - // The default collection may contain deleted documents in its main table, - // so if the query didn't ask for deleted docs, add a condition to the WHERE - // or ON clause that only passes live docs: - auto m = new (ctx) MetaNode(MetaProperty::_notDeleted, source); - ExprNode*& cond = source->isJoin() ? source->_joinOn : _where; - if ( cond ) { - cond->setParent(nullptr); - auto a = new (ctx) OpNode(*lookupOp("AND", 2)); - a->addArg(cond); - a->addArg(m); - cond = a; - } else { - cond = m; + if ( !implicitWhere ) { + for ( SourceNode* source : _sources ) { + if ( !source->_usesDeleted && source->_collection.empty() && source->isCollection() ) { + // The default collection may contain deleted documents in its main table, + // so if the query didn't ask for deleted docs, add a condition to the WHERE + // or ON clause that only passes live docs: + auto m = new (ctx) MetaNode(MetaProperty::_notDeleted, source); + ExprNode*& cond = source->isJoin() ? source->_joinOn : _where; + if ( cond ) { + cond->setParent(nullptr); + auto a = new (ctx) OpNode(*lookupOp("AND", 2)); + a->addArg(cond); + a->addArg(m); + cond = a; + } else { + cond = m; + } + cond->setParent(source->isJoin() ? (Node*)source : (Node*)this); } - cond->setParent(source->isJoin() ? (Node*)source : (Node*)this); } } diff --git a/LiteCore/Query/Translator/TranslatorTables.hh b/LiteCore/Query/Translator/TranslatorTables.hh index 3815c2004..98d198396 100644 --- a/LiteCore/Query/Translator/TranslatorTables.hh +++ b/LiteCore/Query/Translator/TranslatorTables.hh @@ -85,6 +85,7 @@ namespace litecore::qt { rank, #ifdef COUCHBASE_ENTERPRISE vectorDistance, + prediction, #endif }; @@ -160,6 +161,7 @@ namespace litecore::qt { #ifdef COUCHBASE_ENTERPRISE {"APPROX_VECTOR_DISTANCE()", 2, 5, kFnPrecedence, OpType::vectorDistance}, + {"PREDICTION()", 2, 3, kFnPrecedence, OpType::prediction}, #endif }; diff --git a/LiteCore/Query/Translator/TranslatorUtils.cc b/LiteCore/Query/Translator/TranslatorUtils.cc index ec95632d3..069f6ac61 100644 --- a/LiteCore/Query/Translator/TranslatorUtils.cc +++ b/LiteCore/Query/Translator/TranslatorUtils.cc @@ -197,14 +197,18 @@ namespace litecore::qt { } } - string expressionIdentifier(Array expression, unsigned maxItems) { + string expressionIdentifier(Array::iterator i, unsigned maxItems) { SHA1Builder sha; unsigned item = 0; - for ( Array::iterator i(expression); i; ++i ) { + for ( ; i; ++i ) { if ( maxItems > 0 && ++item > maxItems ) break; sha << i.value().toJSON(false, true); } return sha.finish().asBase64(); } + string expressionIdentifier(Array expression, unsigned maxItems) { + return expressionIdentifier(Array::iterator(expression), maxItems); + } + } // namespace litecore::qt diff --git a/LiteCore/Query/Translator/TranslatorUtils.hh b/LiteCore/Query/Translator/TranslatorUtils.hh index 36244b028..521bcda02 100644 --- a/LiteCore/Query/Translator/TranslatorUtils.hh +++ b/LiteCore/Query/Translator/TranslatorUtils.hh @@ -81,6 +81,7 @@ namespace litecore::qt { // Constructs a unique identifier of an expression, from a digest of its JSON. string expressionIdentifier(Array expression, unsigned maxItems = 0); + string expressionIdentifier(Array::iterator expression, unsigned maxItems = 0); } // namespace litecore::qt diff --git a/LiteCore/tests/PredictiveQueryTest.cc b/LiteCore/tests/PredictiveQueryTest.cc index 2ce0d2ca0..6911d1999 100644 --- a/LiteCore/tests/PredictiveQueryTest.cc +++ b/LiteCore/tests/PredictiveQueryTest.cc @@ -16,8 +16,6 @@ #ifdef COUCHBASE_ENTERPRISE -# define SKIP_PREDICTIVE_INDEX //TODO: Add support? - using namespace std; using namespace fleece; using namespace fleece::impl; @@ -114,9 +112,7 @@ N_WAY_TEST_CASE_METHOD(QueryTest, "Predictive Query invalid input", "[Query][Pre PredictiveModel::unregister("8ball"); } -# ifndef SKIP_PREDICTIVE_INDEX - -N_WAY_TEST_CASE_METHOD(QueryTest, "Create/Delete Predictive Index", "[Query][Predict]") { +N_WAY_TEST_CASE_METHOD(QueryTest, "Create and Delete Predictive Index", "[Query][Predict]") { Retained model = new EightBall(db.get()); model->registerAs("8ball"); @@ -162,6 +158,7 @@ N_WAY_TEST_CASE_METHOD(QueryTest, "Predictive Query indexed", "[Query][Predict]" Log("Explanation: %s", explanation.c_str()); if ( pass > 1 ) { + INFO("Explanation: " << explanation); CHECK(explanation.find("prediction(") == string::npos); CHECK(explanation.find("USING INDEX nums") != string::npos); } @@ -280,6 +277,4 @@ N_WAY_TEST_CASE_METHOD(QueryTest, "Predictive Query cached only", "[Query][Predi PredictiveModel::unregister("8ball"); } -# endif // SKIP_PREDICTIVE_INDEX - #endif // COUCHBASE_ENTERPRISE diff --git a/LiteCore/tests/QueryTranslatorTest.cc b/LiteCore/tests/QueryTranslatorTest.cc index 17448d747..f2f0f583a 100644 --- a/LiteCore/tests/QueryTranslatorTest.cc +++ b/LiteCore/tests/QueryTranslatorTest.cc @@ -344,26 +344,26 @@ TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT", "[Query][QueryTr } TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT WHAT", "[Query][QueryTranslator]") { - CHECK_equal(parseWhere("['SELECT', {WHAT: ['._id'], WHERE: ['=', ['.', 'last'], 'Smith']}]"), + CHECK_equal(parse("{WHAT: ['._id'], WHERE: ['=', ['.', 'last'], 'Smith']}"), "SELECT _doc.key FROM kv_default AS _doc WHERE fl_value(_doc.body, 'last') = 'Smith' AND " "(_doc.flags & 1 = 0)"); - CHECK_equal(parseWhere("['SELECT', {WHAT: [['.first']],\ - WHERE: ['=', ['.', 'last'], 'Smith']}]"), + CHECK_equal(parse("{WHAT: [['.first']],\ + WHERE: ['=', ['.', 'last'], 'Smith']}"), "SELECT fl_result(fl_value(_doc.body, 'first')) FROM kv_default AS _doc WHERE fl_value(_doc.body, " "'last') = 'Smith' AND (_doc.flags & 1 = 0)"); - CHECK_equal(parseWhere("['SELECT', {WHAT: [['.first'], ['length()', ['.middle']]],\ - WHERE: ['=', ['.', 'last'], 'Smith']}]"), + CHECK_equal(parse("{WHAT: [['.first'], ['length()', ['.middle']]],\ + WHERE: ['=', ['.', 'last'], 'Smith']}"), "SELECT fl_result(fl_value(_doc.body, 'first')), N1QL_length(fl_value(_doc.body, 'middle')) " "FROM kv_default AS _doc WHERE fl_value(_doc.body, 'last') = 'Smith' AND (_doc.flags & 1 = 0)"); - CHECK_equal(parseWhere("['SELECT', {WHAT: [['.first'], ['AS', ['length()', ['.middle']], 'mid']],\ - WHERE: ['=', ['.', 'last'], 'Smith']}]"), + CHECK_equal(parse("{WHAT: [['.first'], ['AS', ['length()', ['.middle']], 'mid']],\ + WHERE: ['=', ['.', 'last'], 'Smith']}"), "SELECT fl_result(fl_value(_doc.body, 'first')), N1QL_length(fl_value(_doc.body, 'middle')) AS " "mid FROM kv_default AS _doc WHERE fl_value(_doc.body, 'last') = 'Smith' AND (_doc.flags & 1 = 0)"); // Check the "." operator (like SQL "*"): - CHECK_equal(parseWhere("['SELECT', {WHAT: ['.'], WHERE: ['=', ['.', 'last'], 'Smith']}]"), + CHECK_equal(parse("{WHAT: ['.'], WHERE: ['=', ['.', 'last'], 'Smith']}"), "SELECT fl_result(fl_root(_doc.body)) FROM kv_default AS _doc WHERE fl_value(_doc.body, 'last') = " "'Smith' AND (_doc.flags & 1 = 0)"); - CHECK_equal(parseWhere("['SELECT', {WHAT: [['.']], WHERE: ['=', ['.', 'last'], 'Smith']}]"), + CHECK_equal(parse("{WHAT: [['.']], WHERE: ['=', ['.', 'last'], 'Smith']}"), "SELECT fl_result(fl_root(_doc.body)) FROM kv_default AS _doc WHERE fl_value(_doc.body, 'last') = " "'Smith' AND (_doc.flags & 1 = 0)"); } @@ -460,24 +460,24 @@ TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator Join", "[Query][QueryTran TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT UNNEST", "[Query][QueryTranslator][Unnest]") { CHECK_equal( - parseWhere("['SELECT', {\ + parse("{\ FROM: [{as: 'book'}, \ {as: 'notes', 'unnest': ['.book.notes']}],\ - WHERE: ['=', ['.notes'], 'torn']}]"), + WHERE: ['=', ['.notes'], 'torn']}"), "SELECT book.key, book.sequence FROM kv_default AS book JOIN fl_each(book.body, 'notes') AS notes WHERE " "notes.value = 'torn' AND (book.flags & 1 = 0)"); - CHECK_equal(parseWhere("['SELECT', {\ + CHECK_equal(parse("{\ WHAT: ['.notes'], \ FROM: [{as: 'book'}, \ {as: 'notes', 'unnest': ['.book.notes']}],\ - WHERE: ['>', ['.notes.page'], 100]}]"), + WHERE: ['>', ['.notes.page'], 100]}"), "SELECT fl_result(notes.value) FROM kv_default AS book JOIN fl_each(book.body, 'notes') AS notes WHERE " "fl_nested_value(notes.body, 'page') > 100 AND (book.flags & 1 = 0)"); - CHECK_equal(parseWhere("['SELECT', {\ + CHECK_equal(parse("{\ WHAT: ['.notes'], \ FROM: [{as: 'book'}, \ {as: 'notes', 'unnest': ['pi()']}],\ - WHERE: ['>', ['.notes.page'], 100]}]"), + WHERE: ['>', ['.notes.page'], 100]}"), "SELECT fl_result(notes.value) FROM kv_default AS book JOIN fl_each(pi()) AS notes WHERE " "fl_nested_value(notes.body, 'page') > 100 AND (book.flags & 1 = 0)"); } @@ -485,18 +485,18 @@ TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT UNNEST", "[Query][ TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT UNNEST optimized", "[Query][QueryTranslator][Unnest]") { tableNames.insert("kv_default:unnest:notes"); - CHECK_equal(parseWhere("['SELECT', {\ + CHECK_equal(parse("{\ FROM: [{as: 'book'}, \ {as: 'notes', 'unnest': ['.book.notes']}],\ - WHERE: ['=', ['.notes'], 'torn']}]"), + WHERE: ['=', ['.notes'], 'torn']}"), "SELECT book.key, book.sequence FROM kv_default AS book JOIN \"kv_default:unnest:notes\" AS notes ON " "notes.docid=book.rowid WHERE fl_unnested_value(notes.body) = 'torn' AND (book.flags & 1 = 0)"); CHECK_equal( - parseWhere("['SELECT', {\ + parse("{\ WHAT: ['.notes'], \ FROM: [{as: 'book'}, \ {as: 'notes', 'unnest': ['.book.notes']}],\ - WHERE: ['>', ['.notes.page'], 100]}]"), + WHERE: ['>', ['.notes.page'], 100]}"), "SELECT fl_result(fl_unnested_value(notes.body)) FROM kv_default AS book JOIN \"kv_default:unnest:notes\" " "AS notes ON notes.docid=book.rowid WHERE fl_unnested_value(notes.body, 'page') > 100 AND (book.flags & " "1 = 0)"); @@ -504,15 +504,14 @@ TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT UNNEST optimized", TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT UNNEST with collections", "[Query][QueryTranslator][Unnest]") { - string str = "['SELECT', {\ - WHAT: ['.notes'], \ + string str = "{ WHAT: ['.notes'], \ FROM: [{as: 'library'}, \ {collection: 'books', as: 'book', 'on': ['=', ['.book.library'], ['.library._id']]}, \ {as: 'notes', 'unnest': ['.book.notes']}],\ - WHERE: ['>', ['.notes.page'], 100]}]"; + WHERE: ['>', ['.notes.page'], 100]}"; // Non-default collection gets unnested: tableNames.insert("kv_.books"); - CHECK_equal(parseWhere(str), + CHECK_equal(parse(str), "SELECT fl_result(notes.value) FROM kv_default AS library INNER JOIN \"kv_.books\" AS book ON " "fl_value(book.body, 'library') = library.key JOIN fl_each(book.body, 'notes') AS notes WHERE " "fl_nested_value(notes.body, 'page') > 100 AND (library.flags & 1 = 0)"); @@ -520,7 +519,7 @@ TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator SELECT UNNEST with collec // Same, but optimized: tableNames.insert("kv_.books:unnest:notes"); CHECK_equal( - parseWhere(str), + parse(str), "SELECT fl_result(fl_unnested_value(notes.body)) FROM kv_default AS library INNER JOIN \"kv_.books\" AS " "book ON fl_value(book.body, 'library') = library.key JOIN \"kv_.books:unnest:notes\" AS notes ON " "notes.docid=book.rowid WHERE fl_unnested_value(notes.body, 'page') > 100 AND (library.flags & 1 = " @@ -773,6 +772,15 @@ TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator Buried FTS", "[Query][Que } #ifdef COUCHBASE_ENTERPRISE + +TEST_CASE_METHOD(QueryTranslatorTest, "Predictive Index ID", "[Query][QueryTranslator][Predict]") { + // It's important that the mapping from PREDICT expressions to table names doesn't change, + // or it will make existing indexes in existing databases useless. + QueryTranslator t(*this, "_default", "kv_default"); + auto doc = Doc::fromJSON(R"-(["PREDICTION()", "8ball", {"number": [".num"]}])-"); + CHECK(t.predictiveTableName(doc.asArray()) == R"(kv_default:predict:0\M\W\K\Sbbzr0gn4\V\V\Vu\Ks\N\E9s\Z\E8o=)"); +} + TEST_CASE_METHOD(QueryTranslatorTest, "QueryTranslator Vector Search", "[Query][QueryTranslator][VectorSearch]") { tableNames.insert("kv_default:vector:vecIndex"); vectorIndexedProperties.insert({{"kv_default", R"([".vector"])"}, "kv_default:vector:vecIndex"});