Skip to content

Commit

Permalink
CBL-6671: Options of FTS partial index is not fully encoded in the SQ…
Browse files Browse the repository at this point in the history
…Lite table when it is registered

1. Bumped Schema version.
2. In the new version, added a column, whereClause, to table indexes.
3. Upgrade the tables in previous version to include the new column
4. IndexSpec::whereClause is registered in table "indexes" by the new column.
5. c4index_getOptions will have "where" passed in with C4IndexOptions.
6. Tests of the above.
  • Loading branch information
jianminzhao committed Jan 30, 2025
1 parent 2e88f00 commit a083d7e
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 37 deletions.
17 changes: 11 additions & 6 deletions C/c4Index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,14 @@ struct C4IndexImpl final : public C4Index {
slice getExpression() const noexcept { return _spec.expression; }

bool getOptions(C4IndexOptions& opts) const noexcept {
opts = {};
opts = {};
bool hasOptions = false;
if ( auto ftsOpts = _spec.ftsOptions() ) {
opts.language = ftsOpts->language;
opts.ignoreDiacritics = ftsOpts->ignoreDiacritics;
opts.disableStemming = ftsOpts->disableStemming;
opts.stopWords = ftsOpts->stopWords;
return true;
hasOptions = true;

#ifdef COUCHBASE_ENTERPRISE
} else if ( auto vecOpts = _spec.vectorOptions() ) {
Expand Down Expand Up @@ -87,14 +88,18 @@ struct C4IndexImpl final : public C4Index {
if ( vecOpts->minTrainingCount ) opts.vector.minTrainingSize = unsigned(*vecOpts->minTrainingCount);
if ( vecOpts->maxTrainingCount ) opts.vector.maxTrainingSize = unsigned(*vecOpts->maxTrainingCount);
opts.vector.lazy = vecOpts->lazyEmbedding;
return true;
hasOptions = true;
#endif
} else if ( auto arrOpts = _spec.arrayOptions() ) {
opts.unnestPath = (const char*)arrOpts->unnestPath.buf;
return true;
} else {
return false;
hasOptions = true;
}

if ( !_spec.whereClause.empty() ) {
opts.where = (char*)_spec.whereClause.buf;
hasOptions = true;
}
return hasOptions;
}

#ifdef COUCHBASE_ENTERPRISE
Expand Down
27 changes: 27 additions & 0 deletions C/tests/c4DatabaseTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1369,6 +1369,33 @@ TEST_CASE("Database Upgrade From 2.8 with Index", "[Database][Upgrade][C]") {
}
}

TEST_CASE("Database Upgrade to WithIndexesWhereColumn", "[Database][Upgrade][C]") {
// This db has table "indexes". This test ensures that the column whereClause is
// successfully added to the table.
string dbPath = "upgrade_2.8_index.cblite2";

C4DatabaseFlags withFlags{0};
C4Log("---- Opening copy of db %s with flags 0x%x", dbPath.c_str(), withFlags);
C4DatabaseConfig2 config = {slice(TempDir()), withFlags};
auto name = C4Test::copyFixtureDB(kVersionedFixturesSubDir + dbPath);
C4Log("---- copy Fixture to: %s/%s", TempDir().c_str(), name.asString().c_str());
C4Error err;
c4::ref<C4Database> db = REQUIRED(c4db_openNamed(name, &config, WITH_ERROR(&err)));

auto defaultColl = REQUIRED(c4db_getDefaultCollection(db, nullptr));
C4IndexOptions options{};
options.where = "gender = 'female'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("length"), c4str("length(name.first)"), kC4N1QLQuery, kC4ValueIndex,
&options, WITH_ERROR(&err)));

// Check we can get the where clause back via c4index_getOptions
auto index = REQUIRED(c4coll_getIndex(defaultColl, C4STR("length"), nullptr));
C4IndexOptions outOptions;
REQUIRE(c4index_getOptions(index, &outOptions));
CHECK(string(outOptions.where) == options.where);
c4index_release(index);
}

static void setRemoteRev(C4Database* db, slice docID, slice revID, C4RemoteID remote) {
auto defaultColl = c4db_getDefaultCollection(db, nullptr);
C4Document* doc = c4coll_getDoc(defaultColl, docID, true, kDocGetAll, ERROR_INFO());
Expand Down
67 changes: 54 additions & 13 deletions C/tests/c4QueryTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -286,32 +286,29 @@ N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query expression index", "[Query][C]") {
}

N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query partial value index", "[Query][C]") {
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);
const vector<string> result{"0000099"};
for ( int withIndex = 0; withIndex < 2; ++withIndex ) {
if ( withIndex ) {
C4IndexOptions options{};
options.where = "gender = 'female'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("length"), c4str("length(name.first)"), kC4N1QLQuery,
kC4ValueIndex, &options, WITH_ERROR(&err)));
}
c4query_release(query);
const char* queryStr = "SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender = 'female'";
query = c4query_new2(db, kC4N1QLQuery, c4str(queryStr), nullptr, ERROR_INFO(err));
compileSelect("SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender = 'female'", kC4N1QLQuery);
REQUIRE(query);
checkExplanation(withIndex);
CHECK(run() == (vector<string>{"0000099"}));
CHECK(run() == result);

if ( withIndex ) {
c4query_release(query);
// Logically equivalent query, changing gender = 'female' to gender != 'male'.
// Because the condition is not exact as the condition in partial index, it would not use
// the index.
const char* queryStr2 = "SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender != 'male'";
query = c4query_new2(db, kC4N1QLQuery, c4str(queryStr2), nullptr, ERROR_INFO(err));
compileSelect("SELECT META().id FROM _ WHERE length(name.first) = 9 AND gender != 'male'", kC4N1QLQuery);
REQUIRE(query);
checkExplanation(!withIndex);
CHECK(run() == (vector<string>{"0000099"}));
CHECK(run() == result);
}
}
}
Expand Down Expand Up @@ -447,9 +444,18 @@ N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query dict literal", "[Query][C]") {
N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS", "[Query][C][FTS]") {
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("[[\".contact.address.street\"]]"), kC4JSONQuery,
kC4FullTextIndex, nullptr, WITH_ERROR(&err)));
compile(json5("['MATCH()', 'byStreet', 'Hwy']"));

bool useJSON = GENERATE(true, false);
if ( useJSON ) {
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("[[\".contact.address.street\"]]"),
kC4JSONQuery, kC4FullTextIndex, nullptr, WITH_ERROR(&err)));
compile(json5("['MATCH()', 'byStreet', 'Hwy']"));
} else {
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("contact.address.street"), kC4N1QLQuery,
kC4FullTextIndex, nullptr, WITH_ERROR(&err)));
compileSelect("SELECT META().id FROM _ WHERE MATCH(byStreet, 'Hwy')", kC4N1QLQuery);
}

auto results = runFTS();
CHECK(results
== (vector<vector<C4FullTextMatch>>{{{13, 0, 0, 10, 3}},
Expand All @@ -464,6 +470,41 @@ N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS", "[Query][C][FTS]") {
c4slice_free(matched);
}

N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS (Partial)", "[Query][C][FTS]") {
C4Error err;
auto defaultColl = getCollection(db, kC4DefaultCollectionSpec);

// c.f. above test, "C4Query FTS". The query is the same.
// W/o "where" in options, there are five contacts whose street addresses contain "Hwy".

C4IndexOptions options{};
// With the following "where" option, the query picks 2 Californians.
options.where = "contact.address.state = 'CA'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("contact.address.street"), kC4N1QLQuery,
kC4FullTextIndex, &options, WITH_ERROR(&err)));

compileSelect("SELECT META().id FROM _ WHERE MATCH(byStreet, 'Hwy')", kC4N1QLQuery);
auto results = runFTS();
CHECK(results == (vector<vector<C4FullTextMatch>>{{{15, 0, 0, 11, 3}}, {{43, 0, 0, 12, 3}}}));

{
// Check we can get the where clause back via c4index_getOptions
auto index = REQUIRED(c4coll_getIndex(defaultColl, C4STR("byStreet"), nullptr));
C4IndexOptions outOptions;
REQUIRE(c4index_getOptions(index, &outOptions));
CHECK(string(outOptions.where) == options.where);
c4index_release(index);
}

// By creating the index with different options.where, the original index will be deleted,
// and the index table will be based soly on the new where clause. We get one Texan.
options.where = "contact.address.state = 'TX'";
REQUIRE(c4coll_createIndex(defaultColl, C4STR("byStreet"), C4STR("contact.address.street"), kC4N1QLQuery,
kC4FullTextIndex, &options, WITH_ERROR(&err)));
results = runFTS();
CHECK(results == (vector<vector<C4FullTextMatch>>{{{44, 0, 0, 12, 3}}}));
}

N_WAY_TEST_CASE_METHOD(C4QueryTest, "C4Query FTS multiple properties", "[Query][C][FTS]") {
C4Error err;
REQUIRE(c4db_createIndex(
Expand Down
4 changes: 2 additions & 2 deletions C/tests/c4QueryTest.hh
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ class C4QueryTest : public C4Test {

~C4QueryTest() { c4query_release(query); }

void compileSelect(const std::string& queryStr) {
void compileSelect(const std::string& queryStr, C4QueryLanguage language = kC4JSONQuery) {
INFO("Query = " << queryStr);
C4Error error{};
c4query_release(query);
query = c4query_new2(db, kC4JSONQuery, c4str(queryStr.c_str()), nullptr, ERROR_INFO(error));
query = c4query_new2(db, language, c4str(queryStr.c_str()), nullptr, ERROR_INFO(error));
REQUIRE(query);
}

Expand Down
18 changes: 11 additions & 7 deletions LiteCore/Query/IndexSpec.hh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ namespace litecore {
IndexSpec(std::string name_, Type type_, string_view expression_, string_view whereClause_ = {},
QueryLanguage queryLanguage = QueryLanguage::kJSON, Options options_ = {})
: IndexSpec(name_, type_, alloc_slice(expression_), queryLanguage, options_) {
const_cast<alloc_slice&>(this->whereClause) = whereClause_;
setWhereClause(whereClause_);
}

IndexSpec(const IndexSpec&) = delete;
Expand All @@ -112,15 +112,19 @@ namespace litecore {
/** The optional WHERE clause: the condition for a partial index */
const fleece::impl::Array* where() const;

void setWhereClause(string_view whereClause_) const {
if ( !whereClause_.empty() ) whereClause = alloc_slice::nullPaddedString(whereClause_);
}

/** The nested unnestPath from arrayOptions, as separated by "[]." is turned to an array. */
const fleece::impl::Array* unnestPaths() const;

std::string const name; ///< Name of index
Type const type; ///< Type of index
alloc_slice const expression; ///< The query expression
alloc_slice const whereClause; ///< The where clause. If given, expression should be the what clause
QueryLanguage queryLanguage; ///< Is expression JSON or N1QL?
Options const options; ///< Options for FTS and vector indexes
std::string const name; ///< Name of index
Type const type; ///< Type of index
alloc_slice const expression; ///< The query expression
mutable alloc_slice whereClause; ///< The where clause. If given, expression should be the what clause
QueryLanguage queryLanguage; ///< Is expression JSON or N1QL?
Options const options; ///< Options for FTS and vector indexes

private:
fleece::impl::Doc* doc() const;
Expand Down
29 changes: 25 additions & 4 deletions LiteCore/Query/SQLiteDataFile+Indexes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace litecore {
"type INTEGER NOT NULL, " // C4IndexType
"keyStore TEXT NOT NULL, " // Name of the KeyStore it indexes
"expression TEXT, " // Indexed property expression (JSON or N1QL)
"whereClause TEXT, " // Property whereClause for partial index (JSON or N1QL)
"indexTableName TEXT, " // Index's SQLite name
"lastSeq TEXT)"); // indexed sequences, for lazy indexes, else null
ensureSchemaVersionAtLeast(SchemaVersion::WithIndexTable);
Expand All @@ -64,18 +65,23 @@ namespace litecore {

void SQLiteDataFile::registerIndex(const litecore::IndexSpec& spec, const string& keyStoreName,
const string& indexTableName) {
SQLite::Statement stmt(*this, "INSERT INTO indexes (name, type, keyStore, expression, indexTableName) "
"VALUES (?, ?, ?, ?, ?)");
SQLite::Statement stmt(*this,
"INSERT INTO indexes (name, type, keyStore, expression, indexTableName, whereClause) "
"VALUES (?, ?, ?, ?, ?, ?)");
// CBL-6000 adding prefix to distinguish between JSON and N1QL expression
string prefixedExpression{spec.queryLanguage == QueryLanguage::kJSON ? "=j"
: spec.queryLanguage == QueryLanguage::kN1QL ? "=n"
: ""};
// whereClause must be in the same queryLanguage as expression.

prefixedExpression += spec.expression.asString();
stmt.bindNoCopy(1, spec.name);
stmt.bind(2, spec.type);
stmt.bindNoCopy(3, keyStoreName);
stmt.bindNoCopy(4, prefixedExpression.c_str(), (int)prefixedExpression.length());
if ( spec.type != IndexSpec::kValue ) stmt.bindNoCopy(5, indexTableName);
if ( !spec.whereClause.empty() ) stmt.bindNoCopy(6, (char*)spec.whereClause.buf, (int)spec.whereClause.size);

LogStatement(stmt);
stmt.exec();
}
Expand All @@ -99,6 +105,20 @@ namespace litecore {
case IndexSpec::kFullText:
case IndexSpec::kVector:
same = schemaExistsWithSQL(indexTableName, "table", indexTableName, indexSQL);
if ( spec.type == IndexSpec::kFullText ) {
if ( same ) {
auto whatA = spec.what(), whatB = existingSpec->what();
if ( !whatA ? whatB != nullptr : !whatB ) same = false;
else if ( whatA )
same = whatA->toJSONString() == whatB->toJSONString();
}
if ( same ) {
auto whereA = spec.where(), whereB = existingSpec->where();
if ( !whereA ? whereB != nullptr : !whereB ) same = false;
else if ( whereA )
same = whereA->toJSONString() == whereB->toJSONString();
}
}
break;
case IndexSpec::kArray:
same = schemaExistsWithSQL(spec.name, "index", hexName(indexTableName), indexSQL);
Expand Down Expand Up @@ -194,7 +214,7 @@ namespace litecore {

vector<SQLiteIndexSpec> SQLiteDataFile::getIndexes(const KeyStore* store) const {
if ( indexTableExists() ) {
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq "
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq, whereClause "
"FROM indexes ORDER BY name";
if ( _schemaVersion < SchemaVersion::WithIndexesLastSeq ) {
// If schema doesn't have the `lastSeq` column, don't query it:
Expand Down Expand Up @@ -251,7 +271,7 @@ namespace litecore {
// Gets info of a single index. (Subroutine of create/deleteIndex.)
optional<SQLiteIndexSpec> SQLiteDataFile::getIndex(slice name) {
if ( !indexTableExists() ) return nullopt;
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq "
string sql = "SELECT name, type, expression, keyStore, indexTableName, lastSeq, whereClause "
"FROM indexes WHERE name=?";
if ( _schemaVersion < SchemaVersion::WithIndexesLastSeq ) {
// If schema doesn't have the `lastSeq` column, don't query it:
Expand Down Expand Up @@ -320,6 +340,7 @@ namespace litecore {

SQLiteIndexSpec spec{name, type, expression, queryLanguage, options, keyStoreName, indexTableName};
if ( auto col5 = stmt.getColumn(5); col5.isText() ) spec.indexedSequences = col5.getText();
if ( auto col6 = stmt.getColumn(6); col6.isText() ) spec.setWhereClause({col6.getText()});
return spec;
}

Expand Down
10 changes: 10 additions & 0 deletions LiteCore/Storage/SQLiteDataFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,16 @@ namespace litecore {
}
}
});

(void)upgradeSchema(SchemaVersion::WithIndexesWhereColumn, "Adding indexes.whereClause column", [&] {
string sql;
if ( getSchema("indexes", "table", "indexes", sql) ) {
// Check if the table needs to be updated to add the 'lastSeq' column: (v3.2)
if ( sql.find("whereClause") == string::npos ) {
_exec("ALTER TABLE indexes ADD COLUMN whereClause TEXT");
}
}
});
});

// Configure number of extra threads to be used by SQLite:
Expand Down
11 changes: 6 additions & 5 deletions LiteCore/Storage/SQLiteDataFile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,13 @@ namespace litecore {

WithNewDocs = 400, // New document/revision storage (CBL 3.0)

WithDeletedTable = 500, // Added 'deleted' KeyStore for deleted docs (CBL 3.0?)
WithIndexesLastSeq = 501, // Added 'lastSeq' column to 'indexes' table (CBL 3.2)
WithExpirationColumn = 502, // Added 'expiration' column to KeyStore
MaxReadable = 599, // Cannot open versions newer than this
WithDeletedTable = 500, // Added 'deleted' KeyStore for deleted docs (CBL 3.0?)
WithIndexesLastSeq = 501, // Added 'lastSeq' column to 'indexes' table (CBL 3.2)
WithExpirationColumn = 502, // Added 'expiration' column to KeyStore
WithIndexesWhereColumn = 503, // Added 'whereClause' column to the "indexes" table
MaxReadable = 599, // Cannot open versions newer than this

Current = WithExpirationColumn
Current = WithIndexesWhereColumn
};

void reopenSQLiteHandle();
Expand Down

0 comments on commit a083d7e

Please sign in to comment.