Skip to content

Commit

Permalink
CBL-6671: Options of FTS partial index is not fully encoded in the SQ… (
Browse files Browse the repository at this point in the history
#2220)

* CBL-6671: Options of FTS partial index is not fully encoded in the SQLite table when it is registered

1. Bumped Schema version.
2. In the new version, added a column, whereClause, to table indexes.
3. Upgrade the tables in previous version to include the new column
4. IndexSpec::whereClause is registered in table "indexes" by the new column.
5. c4index_getOptions will have "where" passed in with C4IndexOptions.
6. Tests of the above.
  • Loading branch information
jianminzhao authored Jan 31, 2025
1 parent fcad0a2 commit 2140bfb
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 32 deletions.
17 changes: 11 additions & 6 deletions C/c4Index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,14 @@ struct C4IndexImpl final : public C4Index {
slice getExpression() const noexcept { return _spec.expression; }

bool getOptions(C4IndexOptions& opts) const noexcept {
opts = {};
opts = {};
bool hasOptions = false;
if ( auto ftsOpts = _spec.ftsOptions() ) {
opts.language = ftsOpts->language;
opts.ignoreDiacritics = ftsOpts->ignoreDiacritics;
opts.disableStemming = ftsOpts->disableStemming;
opts.stopWords = ftsOpts->stopWords;
return true;
hasOptions = true;

#ifdef COUCHBASE_ENTERPRISE
} else if ( auto vecOpts = _spec.vectorOptions() ) {
Expand Down Expand Up @@ -87,14 +88,18 @@ struct C4IndexImpl final : public C4Index {
if ( vecOpts->minTrainingCount ) opts.vector.minTrainingSize = unsigned(*vecOpts->minTrainingCount);
if ( vecOpts->maxTrainingCount ) opts.vector.maxTrainingSize = unsigned(*vecOpts->maxTrainingCount);
opts.vector.lazy = vecOpts->lazyEmbedding;
return true;
hasOptions = true;
#endif
} else if ( auto arrOpts = _spec.arrayOptions() ) {
opts.unnestPath = (const char*)arrOpts->unnestPath.buf;
return true;
} else {
return false;
hasOptions = true;
}

if ( !_spec.whereClause.empty() ) {
opts.where = (char*)_spec.whereClause.buf;
hasOptions = true;
}
return hasOptions;
}

#ifdef COUCHBASE_ENTERPRISE
Expand Down
27 changes: 27 additions & 0 deletions C/tests/c4DatabaseTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,33 @@ TEST_CASE("Database Upgrade from 3.1 with peerCheckpoints table", "[Database][Up
CHECK(db);
}

TEST_CASE("Database Upgrade to WithIndexesWhereColumn", "[Database][Upgrade][C]") {
// This db has table "indexes". This test ensures that the column whereClause is
// successfully added to the table.
string dbPath = "upgrade_2.8_index.cblite2";

C4DatabaseFlags withFlags{0};
C4Log("---- Opening copy of db %s with flags 0x%x", dbPath.c_str(), withFlags);
C4DatabaseConfig2 config = {slice(TempDir()), withFlags};
auto name = C4Test::copyFixtureDB(kVersionedFixturesSubDir + dbPath);
C4Log("---- copy Fixture to: %s/%s", TempDir().c_str(), name.asString().c_str());
C4Error err;
c4::ref<C4Database> db = REQUIRED(c4db_openNamed(name, &config, WITH_ERROR(&err)));

auto defaultColl = REQUIRED(c4db_getDefaultCollection(db, nullptr));
C4IndexOptions options{};
options.where = "gender = 'female'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("length"), c4str("length(name.first)"), kC4N1QLQuery, kC4ValueIndex,
&options, WITH_ERROR(&err)));

// Check we can get the where clause back via c4index_getOptions
auto index = REQUIRED(c4coll_getIndex(defaultColl, C4STR("length"), nullptr));
C4IndexOptions outOptions;
REQUIRE(c4index_getOptions(index, &outOptions));
CHECK(string(outOptions.where) == options.where);
c4index_release(index);
}

static void setRemoteRev(C4Database* db, slice docID, slice revID, C4RemoteID remote) {
auto defaultColl = c4db_getDefaultCollection(db, nullptr);
C4Document* doc = c4coll_getDoc(defaultColl, docID, true, kDocGetAll, ERROR_INFO());
Expand Down
83 changes: 70 additions & 13 deletions C/tests/c4QueryTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -286,32 +286,29 @@ N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query expression index", "[Query][C]") {
}

N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query partial value index", "[Query][C]") {
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);
const vector<string> result{"0000099"};
for ( int withIndex = 0; withIndex < 2; ++withIndex ) {
if ( withIndex ) {
C4IndexOptions options{};
options.where = "gender = 'female'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("length"), c4str("length(name.first)"), kC4N1QLQuery,
kC4ValueIndex, &options, WITH_ERROR(&err)));
}
c4query_release(query);
const char* queryStr = "SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender = 'female'";
query = c4query_new2(db, kC4N1QLQuery, c4str(queryStr), nullptr, ERROR_INFO(err));
compileSelect("SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender = 'female'", kC4N1QLQuery);
REQUIRE(query);
checkExplanation(withIndex);
CHECK(run() == (vector<string>{"0000099"}));
CHECK(run() == result);

if ( withIndex ) {
c4query_release(query);
// Logically equivalent query, changing gender = 'female' to gender != 'male'.
// Because the condition is not exact as the condition in partial index, it would not use
// the index.
const char* queryStr2 = "SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender != 'male'";
query = c4query_new2(db, kC4N1QLQuery, c4str(queryStr2), nullptr, ERROR_INFO(err));
compileSelect("SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender != 'male'", kC4N1QLQuery);
REQUIRE(query);
checkExplanation(!withIndex);
CHECK(run() == (vector<string>{"0000099"}));
CHECK(run() == result);
}
}
}
Expand Down Expand Up @@ -447,9 +444,18 @@ N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query dict literal", "[Query][C]") {
N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS", "[Query][C][FTS]") {
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("[[\".contact.address.street\"]]"), kC4JSONQuery,
kC4FullTextIndex, nullptr, WITH_ERROR(&err)));
compile(json5("['MATCH()', 'byStreet', 'Hwy']"));

bool useJSON = GENERATE(true, false);
if ( useJSON ) {
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("[[\".contact.address.street\"]]"),
kC4JSONQuery, kC4FullTextIndex, nullptr, WITH_ERROR(&err)));
compile(json5("['MATCH()', 'byStreet', 'Hwy']"));
} else {
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("contact.address.street"), kC4N1QLQuery,
kC4FullTextIndex, nullptr, WITH_ERROR(&err)));
compileSelect("SELECT META().id FROM _ WHERE MATCH(byStreet, 'Hwy')", kC4N1QLQuery);
}

auto results = runFTS();
CHECK(results
== (vector<vector<C4FullTextMatch>>{{{13, 0, 0, 10, 3}},
Expand All @@ -464,6 +470,57 @@ N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS", "[Query][C][FTS]") {
c4slice_free(matched);
}

N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS (Partial)", "[Query][C][FTS]") {
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);

// c.f. above test, "C4Query FTS". The query is the same.
// W/o "where" in options, there are five contacts whose street addresses contain "Hwy".

bool useJSON = GENERATE(false, true);

C4IndexOptions options{};
// With the following "where" option, the query picks 2 Californians.
if ( useJSON ) {
options.where = R"(["=", [".contact.address.state"], "CA"])";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("[[\".contact.address.street\"]]"),
kC4JSONQuery, kC4FullTextIndex, &options, WITH_ERROR(&err)));
compile(json5("['MATCH()', 'byStreet', 'Hwy']"));
} else {
options.where = "contact.address.state = 'CA'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("contact.address.street"), kC4N1QLQuery,
kC4FullTextIndex, &options, WITH_ERROR(&err)));
compileSelect("SELECT META().id FROM _ WHERE MATCH(byStreet, 'Hwy')", kC4N1QLQuery);
}

auto results = runFTS();
CHECK(results == (vector<vector<C4FullTextMatch>>{{{15, 0, 0, 11, 3}}, {{43, 0, 0, 12, 3}}}));

{
// Check we can get the where clause back via c4index_getOptions
auto index = REQUIRED(c4coll_getIndex(defaultColl, C4STR("byStreet"), nullptr));
C4IndexOptions outOptions;
REQUIRE(c4index_getOptions(index, &outOptions));
CHECK(string(outOptions.where) == options.where);
c4index_release(index);
}

// By creating the index with different options.where, the original index will be deleted,
// and the index table will be based soly on the new where clause. We get one Texan.
if ( useJSON ) {
options.where = R"(["=", [".contact.address.state"], "TX"])";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("[[\".contact.address.street\"]]"),
kC4JSONQuery, kC4FullTextIndex, &options, WITH_ERROR(&err)));
} else {
options.where = "contact.address.state = 'TX'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("contact.address.street"), kC4N1QLQuery,
kC4FullTextIndex, &options, WITH_ERROR(&err)));
}

results = runFTS();
CHECK(results == (vector<vector<C4FullTextMatch>>{{{44, 0, 0, 12, 3}}}));
}

N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS multiple properties", "[Query][C][FTS]") {
C4Error err;
REQUIRE(c4db_createIndex(
Expand Down
4 changes: 2 additions & 2 deletions C/tests/c4QueryTest.hh
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ class C4QueryTest : public C4Test {

~C4QueryTest() { c4query_release(query); }

void compileSelect(const std::string& queryStr) {
void compileSelect(const std::string& queryStr, C4QueryLanguage language = kC4JSONQuery) {
INFO("Query = " << queryStr);
C4Error error{};
c4query_release(query);
query = c4query_new2(db, kC4JSONQuery, c4str(queryStr.c_str()), nullptr, ERROR_INFO(error));
query = c4query_new2(db, language, c4str(queryStr.c_str()), nullptr, ERROR_INFO(error));
REQUIRE(query);
}

Expand Down
10 changes: 8 additions & 2 deletions LiteCore/Query/IndexSpec.hh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ namespace litecore {
IndexSpec(std::string name_, Type type_, string_view expression_, string_view whereClause_ = {},
QueryLanguage queryLanguage = QueryLanguage::kJSON, Options options_ = {})
: IndexSpec(name_, type_, alloc_slice(expression_), queryLanguage, options_) {
const_cast<alloc_slice&>(this->whereClause) = whereClause_;
setWhereClause(whereClause_);
}

IndexSpec(const IndexSpec&) = delete;
Expand All @@ -112,13 +112,19 @@ namespace litecore {
/** The optional WHERE clause: the condition for a partial index */
const fleece::impl::Array* where() const;

void setWhereClause(string_view whereClause_) {
if ( !whereClause_.empty() ) whereClause = alloc_slice::nullPaddedString(whereClause_);
else
whereClause.reset();
}

/** The nested unnestPath from arrayOptions, as separated by "[]." is turned to an array. */
const fleece::impl::Array* unnestPaths() const;

std::string const name; ///< Name of index
Type const type; ///< Type of index
alloc_slice const expression; ///< The query expression
alloc_slice const whereClause; ///< The where clause. If given, expression should be the what clause
alloc_slice whereClause; ///< The where clause. If given, expression should be the what clause
QueryLanguage queryLanguage; ///< Is expression JSON or N1QL?
Options const options; ///< Options for FTS and vector indexes

Expand Down
25 changes: 21 additions & 4 deletions LiteCore/Query/SQLiteDataFile+Indexes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace litecore {
"type INTEGER NOT NULL, " // C4IndexType
"keyStore TEXT NOT NULL, " // Name of the KeyStore it indexes
"expression TEXT, " // Indexed property expression (JSON or N1QL)
"whereClause TEXT, " // Property whereClause for partial index (JSON or N1QL)
"indexTableName TEXT, " // Index's SQLite name
"lastSeq TEXT)"); // indexed sequences, for lazy indexes, else null
ensureSchemaVersionAtLeast(SchemaVersion::WithIndexTable);
Expand All @@ -64,18 +65,23 @@ namespace litecore {

void SQLiteDataFile::registerIndex(const litecore::IndexSpec& spec, const string& keyStoreName,
const string& indexTableName) {
SQLite::Statement stmt(*this, "INSERT INTO indexes (name, type, keyStore, expression, indexTableName) "
"VALUES (?, ?, ?, ?, ?)");
SQLite::Statement stmt(*this,
"INSERT INTO indexes (name, type, keyStore, expression, indexTableName, whereClause) "
"VALUES (?, ?, ?, ?, ?, ?)");
// CBL-6000 adding prefix to distinguish between JSON and N1QL expression
string prefixedExpression{spec.queryLanguage == QueryLanguage::kJSON ? "=j"
: spec.queryLanguage == QueryLanguage::kN1QL ? "=n"
: ""};
// whereClause must be in the same queryLanguage as expression.

prefixedExpression += spec.expression.asString();
stmt.bindNoCopy(1, spec.name);
stmt.bind(2, spec.type);
stmt.bindNoCopy(3, keyStoreName);
stmt.bindNoCopy(4, prefixedExpression.c_str(), (int)prefixedExpression.length());
if ( spec.type != IndexSpec::kValue ) stmt.bindNoCopy(5, indexTableName);
if ( !spec.whereClause.empty() ) stmt.bindNoCopy(6, (char*)spec.whereClause.buf, (int)spec.whereClause.size);

LogStatement(stmt);
stmt.exec();
}
Expand All @@ -99,6 +105,16 @@ namespace litecore {
case IndexSpec::kFullText:
case IndexSpec::kVector:
same = schemaExistsWithSQL(indexTableName, "table", indexTableName, indexSQL);
if ( spec.type == IndexSpec::kFullText ) {
if ( same ) {
auto whatA = spec.what(), whatB = existingSpec->what();
same = whatA ? whatA->isEqual(whatB) : !whatB;
}
if ( same ) {
auto whereA = spec.where(), whereB = existingSpec->where();
same = whereA ? whereA->isEqual(whereB) : !whereB;
}
}
break;
case IndexSpec::kArray:
same = schemaExistsWithSQL(spec.name, "index", hexName(indexTableName), indexSQL);
Expand Down Expand Up @@ -194,7 +210,7 @@ namespace litecore {

vector<SQLiteIndexSpec> SQLiteDataFile::getIndexes(const KeyStore* store) const {
if ( indexTableExists() ) {
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq "
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq, whereClause "
"FROM indexes ORDER BY name";
if ( _schemaVersion < SchemaVersion::WithIndexesLastSeq ) {
// If schema doesn't have the `lastSeq` column, don't query it:
Expand Down Expand Up @@ -251,7 +267,7 @@ namespace litecore {
// Gets info of a single index. (Subroutine of create/deleteIndex.)
optional<SQLiteIndexSpec> SQLiteDataFile::getIndex(slice name) {
if ( !indexTableExists() ) return nullopt;
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq "
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq, whereClause "
"FROM indexes WHERE name=?";
if ( _schemaVersion < SchemaVersion::WithIndexesLastSeq ) {
// If schema doesn't have the `lastSeq` column, don't query it:
Expand Down Expand Up @@ -320,6 +336,7 @@ namespace litecore {

SQLiteIndexSpec spec{name, type, expression, queryLanguage, options, keyStoreName, indexTableName};
if ( auto col5 = stmt.getColumn(5); col5.isText() ) spec.indexedSequences = col5.getText();
if ( auto col6 = stmt.getColumn(6); col6.isText() ) spec.setWhereClause({col6.getText()});
return spec;
}

Expand Down
10 changes: 10 additions & 0 deletions LiteCore/Storage/SQLiteDataFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,16 @@ namespace litecore {
}
}
});

(void)upgradeSchema(SchemaVersion::WithIndexesWhereColumn, "Adding indexes.whereClause column", [&] {
string sql;
if ( getSchema("indexes", "table", "indexes", sql) ) {
// Check if the table needs to be updated to add the 'lastSeq' column: (v3.2)
if ( sql.find("whereClause") == string::npos ) {
_exec("ALTER TABLE indexes ADD COLUMN whereClause TEXT");
}
}
});
});

// Configure number of extra threads to be used by SQLite:
Expand Down
11 changes: 6 additions & 5 deletions LiteCore/Storage/SQLiteDataFile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,13 @@ namespace litecore {

WithNewDocs = 400, // New document/revision storage (CBL 3.0)

WithDeletedTable = 500, // Added 'deleted' KeyStore for deleted docs (CBL 3.0?)
WithIndexesLastSeq = 501, // Added 'lastSeq' column to 'indexes' table (CBL 3.2)
WithExpirationColumn = 502, // Added 'expiration' column to KeyStore
MaxReadable = 599, // Cannot open versions newer than this
WithDeletedTable = 500, // Added 'deleted' KeyStore for deleted docs (CBL 3.0?)
WithIndexesLastSeq = 501, // Added 'lastSeq' column to 'indexes' table (CBL 3.2)
WithExpirationColumn = 502, // Added 'expiration' column to KeyStore
WithIndexesWhereColumn = 503, // Added 'whereClause' column to the "indexes" table
MaxReadable = 599, // Cannot open versions newer than this

Current = WithExpirationColumn
Current = WithIndexesWhereColumn
};

void reopenSQLiteHandle();
Expand Down

0 comments on commit 2140bfb

Please sign in to comment.