From fb01661ec0110926dd412bee5a3c5184a84f68b9 Mon Sep 17 00:00:00 2001 From: jianminzhao <76990468+jianminzhao@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:43:14 -0700 Subject: [PATCH 1/5] CBL-6099: Test "Rapid Restarts" failing frequently on Linux (#2111) After the replicator is stopped, it may still take some time to wind down objects Our test currently allows 2 seconds for it. This turns out not enough in situation when there are following errors, Sync ERROR Obj=/Repl#21/revfinder#26/ Got LiteCore error: LiteCore NotOpen, "database not open" This error is artificial. It is because we close the database as soon as the replicator goes to stopped, as opposed to when the replicator is deleted. The error follows the exception to the top frame and takes substantial time when there are many of them. I increased 2 seconds allowance to 20 seconds. We won't always wait for 20 seconds, because the waiter polls the condition every 50ms. A situation of actual memory leak will wait for whole 20 seconds before failure. --- C/tests/c4Test.cc | 2 +- LiteCore/tests/LiteCoreTest.cc | 2 +- Replicator/tests/ReplicatorAPITest.cc | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/C/tests/c4Test.cc b/C/tests/c4Test.cc index 7e35706c5..c2de695f9 100644 --- a/C/tests/c4Test.cc +++ b/C/tests/c4Test.cc @@ -223,7 +223,7 @@ C4Test::~C4Test() { if ( !current_exception() ) { // Check for leaks: - if ( !WaitUntil(2000ms, [&] { return c4_getObjectCount() - objectCount == 0; }) ) { + if ( !WaitUntil(20s, [&] { return c4_getObjectCount() - objectCount == 0; }) ) { FAIL_CHECK("LiteCore objects were leaked by this test:"); fprintf(stderr, "*** LEAKED LITECORE OBJECTS: \n"); c4_dumpInstances(); diff --git a/LiteCore/tests/LiteCoreTest.cc b/LiteCore/tests/LiteCoreTest.cc index d13ecd14a..4e0b2f45a 100644 --- a/LiteCore/tests/LiteCoreTest.cc +++ b/LiteCore/tests/LiteCoreTest.cc @@ -110,7 +110,7 @@ TestFixture::TestFixture() : _warningsAlreadyLogged(sWarningsLogged), _objectCou TestFixture::~TestFixture() { if ( !current_exception() ) { // Check for leaks: - if ( !WaitUntil(2000ms, [&] { return c4_getObjectCount() - _objectCount == 0; }) ) { + if ( !WaitUntil(20s, [&] { return c4_getObjectCount() - _objectCount == 0; }) ) { FAIL_CHECK("LiteCore objects were leaked by this test:"); fprintf(stderr, "*** LEAKED LITECORE OBJECTS: \n"); c4_dumpInstances(); diff --git a/Replicator/tests/ReplicatorAPITest.cc b/Replicator/tests/ReplicatorAPITest.cc index 9c6912e78..f562f6945 100644 --- a/Replicator/tests/ReplicatorAPITest.cc +++ b/Replicator/tests/ReplicatorAPITest.cc @@ -696,6 +696,8 @@ TEST_CASE_METHOD(ReplicatorAPITest, "Rapid Restarts", "[C][Push][Pull]") { C4Error err; REQUIRE(startReplicator(kC4Continuous, kC4Continuous, WITH_ERROR(&err))); waitForStatus(kC4Busy, 5s); + // give little time in busy + std::this_thread::sleep_for(50ms); C4ReplicatorActivityLevel expected = kC4Stopped; SECTION("Stop / Start") { From 0e613ae41e55c5f6c149469f5522fd00fec83396 Mon Sep 17 00:00:00 2001 From: jianminzhao <76990468+jianminzhao@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:55:35 -0700 Subject: [PATCH 2/5] CBL-6104: Flaky test, "Multiple Collections Incremental Revisions" (#2112) The reason the test occasionally fails because we assumeed that successive revisions all get replicated to the destinations, whereas replicator may skip obsolete revision, say rev-2, when rev-3 exists when the pusher turns to find the revisions to push. In our test case, we create successive revisions in intervals of 500 milliseconds. Most time, 500 ms is enough to set apart when the pusher picks the revisions. On Jenkins machine, the log shows that it found obsolete revisions when the test failed. We modified the test success criteria: we only check that the latest revisions are replicated to the destination. This is the designed behavior. --- Replicator/tests/ReplicatorCollectionTest.cc | 88 ++++++++------------ 1 file changed, 33 insertions(+), 55 deletions(-) diff --git a/Replicator/tests/ReplicatorCollectionTest.cc b/Replicator/tests/ReplicatorCollectionTest.cc index 6d4f5c8d8..28c77fc20 100644 --- a/Replicator/tests/ReplicatorCollectionTest.cc +++ b/Replicator/tests/ReplicatorCollectionTest.cc @@ -496,6 +496,9 @@ TEST_CASE_METHOD(ReplicatorCollectionTest, "Multiple Collections Incremental Rev C4Collection* roses2 = getCollection(db2, Roses); C4Collection* tulips2 = getCollection(db2, Tulips); Jthread jthread; + _expectedDocumentCount = -1; + std::vector> docsWithIncrementalRevisions = {{roses2, "roses-docko"_sl}, + {tulips2, "tulips-docko"_sl}}; SECTION("PUSH") { _callbackWhenIdle = [=, &jthread]() { @@ -511,14 +514,8 @@ TEST_CASE_METHOD(ReplicatorCollectionTest, "Multiple Collections Incremental Rev _callbackWhenIdle = nullptr; }; - // 4 docs plus 6 revs - _expectedDocumentCount = 10; runPushReplication({Roses, Tulips}, {Tulips, Lavenders, Roses}, kC4Continuous); - - CHECK(c4coll_getDocumentCount(roses2) == 3); - CHECK(c4coll_getDocumentCount(tulips2) == 3); } - SECTION("PULL") { _callbackWhenIdle = [=, &jthread]() { jthread.thread = std::thread(std::thread{[=]() { @@ -533,65 +530,46 @@ TEST_CASE_METHOD(ReplicatorCollectionTest, "Multiple Collections Incremental Rev _callbackWhenIdle = nullptr; }; - _expectedDocumentCount = 10; runPullReplication({Tulips, Lavenders, Roses}, {Roses, Tulips}, kC4Continuous); - CHECK(c4coll_getDocumentCount(roses2) == 3); - CHECK(c4coll_getDocumentCount(tulips2) == 3); } - SECTION("PUSH and PULL") { addDocs(db2, Roses, 2, "db2-Roses-"); addDocs(db2, Tulips, 2, "db2-Tulips-"); + docsWithIncrementalRevisions.emplace_back(roses, "roses2-docko"_sl); + docsWithIncrementalRevisions.emplace_back(tulips, "tulips2-docko"_sl); - std::mutex mutex; - int stopped{0}; - std::condition_variable cv; - - _callbackWhenIdle = [=, &jthread, &mutex, &cv, &stopped]() { - if ( jthread.thread.get_id() == std::thread::id() ) { - jthread.thread = thread(std::thread{[=, &mutex, &cv, &stopped]() { - addRevs(roses, 500ms, alloc_slice("roses-docko"), 1, 3, true, "db-roses"); - addRevs(tulips, 500ms, alloc_slice("tulips-docko"), 1, 3, true, "db-tulips"); - addRevs(roses2, 500ms, alloc_slice("roses2-docko"), 1, 3, true, "db2-roses"); - addRevs(tulips2, 500ms, alloc_slice("tulips2-docko"), 1, 3, true, "db2-tulips"); - std::unique_lock lk(mutex); - if ( !cv.wait_for(lk, 10s, [&stopped]() { return stopped == 1; }) ) { - // timed out. Stop the replicator to avoid hanging. - stopped = 2; - _replClient->stop(); - } - }}); - } - if ( _statusReceived.progress.documentCount < _expectedDocumentCount ) { return; } - // It seems that windows will delete the lambda object after - // _callbackWhenIdle = nullptr. Therefore, capture it beforehand. - auto self = this; - self->_callbackWhenIdle = nullptr; - self->_stopOnIdle = true; - self->_checkStopWhenIdle(); + _callbackWhenIdle = [=, &jthread]() { + jthread.thread = thread(std::thread{[=]() { + // When first time it turns to Idle, we assume 2 documents from db are pushed to db2, + // and 2 documents from db2 are pulled to db. + CHECK(c4coll_getDocumentCount(roses) == 4); + CHECK(c4coll_getDocumentCount(tulips) == 4); + CHECK(c4coll_getDocumentCount(roses2) == 4); + CHECK(c4coll_getDocumentCount(tulips2) == 4); + + // We now add 3 revisions of respective docs to db and db2. The are supposed to be + // pushed and pulled to db2 and db, respectively. + // In 5 seconds, we assume that latest revision, 3, will be replicated to the + // destinations. + addRevs(roses, 500ms, alloc_slice("roses-docko"), 1, 3, true, "db-roses"); + addRevs(tulips, 500ms, alloc_slice("tulips-docko"), 1, 3, true, "db-tulips"); + addRevs(roses2, 500ms, alloc_slice("roses2-docko"), 1, 3, true, "db2-roses"); + addRevs(tulips2, 500ms, alloc_slice("tulips2-docko"), 1, 3, true, "db2-tulips"); + sleepFor(5s); + stopWhenIdle(); + }}); + _callbackWhenIdle = nullptr; }; - // 3 revs from roses to roses2, 3 from roses2 to roses, total 6 - // 3 revs from tulips to tulips2, 3 from tulips2 to tulips, total 6 - // 4 docs for push, 4docs for pull, total 8 - _expectedDocumentCount = -1; // disable checking document count in runReplicators. - unsigned expectedDocumentCount = 20; runPushPullReplication({Roses, Tulips}, {Tulips, Lavenders, Roses}, kC4Continuous); - { - std::lock_guard lock(mutex); - if ( stopped == 0 ) { - stopped = 1; - cv.notify_all(); - } else if ( stopped == 2 ) { - UNSCOPED_INFO("The replicator is forced to stop after timeout"); - } - } + } - CHECK(_statusReceived.progress.documentCount == expectedDocumentCount); - CHECK(c4coll_getDocumentCount(roses) == 6); - CHECK(c4coll_getDocumentCount(tulips) == 6); - CHECK(c4coll_getDocumentCount(roses2) == 6); - CHECK(c4coll_getDocumentCount(tulips2) == 6); + // Check docs that have incremental revisions got across the latest revision, 3. + for ( const auto& coll_doc : docsWithIncrementalRevisions ) { + c4::ref doc = c4coll_getDoc(coll_doc.first, coll_doc.second, true, kDocGetMetadata, ERROR_INFO()); + CHECK(doc); + alloc_slice hist = c4doc_getRevisionHistory(doc, 1, nullptr, 0); + if ( isRevTrees() ) CHECK(3 == c4rev_getGeneration(hist)); } } From 2bdccb1637772b165bd0919f4c17d1bd7dc07eaf Mon Sep 17 00:00:00 2001 From: jianminzhao <76990468+jianminzhao@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:56:04 -0700 Subject: [PATCH 3/5] CBL-6100: Flaky test "REST root level" (#2110) Allow several timeouts to the HTTP Request. For now, allowing 4 timeouts with each timeout being 5 seconds. --- REST/tests/RESTListenerTest.cc | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/REST/tests/RESTListenerTest.cc b/REST/tests/RESTListenerTest.cc index 47075aa4a..bc277eacb 100644 --- a/REST/tests/RESTListenerTest.cc +++ b/REST/tests/RESTListenerTest.cc @@ -21,6 +21,7 @@ #include "NetworkInterfaces.hh" #include "fleece/Mutable.hh" #include "ReplicatorAPITest.hh" +#include "WebSocketInterface.hh" #include #include @@ -107,14 +108,32 @@ class C4RESTTest C4Log("---- %s %s", method.c_str(), uri.c_str()); string scheme = config.tlsConfig ? "https" : "http"; auto port = c4listener_getPort(listener()); - unique_ptr r(new Response(scheme, method, requestHostname, port, uri)); - r->setHeaders(headers).setBody(body); - if ( pinnedCert ) r->allowOnlyCert(pinnedCert); + unique_ptr r; + unsigned totalAttempt = 5; + double timeout = 5; + for ( unsigned attemptCount = 0; attemptCount < totalAttempt; ++attemptCount ) { + r.reset(new Response(scheme, method, requestHostname, port, uri)); + r->setHeaders(headers).setBody(body); + if ( pinnedCert ) r->allowOnlyCert(pinnedCert); # ifdef COUCHBASE_ENTERPRISE - if ( rootCerts ) r->setRootCerts(rootCerts); - if ( clientIdentity.cert ) r->setIdentity(clientIdentity.cert, clientIdentity.key); + if ( rootCerts ) r->setRootCerts(rootCerts); + if ( clientIdentity.cert ) r->setIdentity(clientIdentity.cert, clientIdentity.key); # endif - if ( !r->run() ) C4LogToAt(kC4DefaultLog, kC4LogWarning, "Error: %s", c4error_descriptionStr(r->error())); + r->setTimeout(timeout); + if ( !r->run() ) { + if ( r->error().code != websocket::kNetErrTimeout || attemptCount + 1 == totalAttempt ) { + if ( attemptCount > 0 ) + C4LogToAt(kC4DefaultLog, kC4LogWarning, "Error: %s after %d timeouts", + c4error_descriptionStr(r->error()), attemptCount); + else + C4LogToAt(kC4DefaultLog, kC4LogWarning, "Error: %s", c4error_descriptionStr(r->error())); + } + } else { + if ( attemptCount > 0 ) + C4LogToAt(kC4DefaultLog, kC4LogInfo, "Request has been timed out %d times", attemptCount); + break; + } + } C4Log("Status: %d %s", static_cast(r->status()), r->statusMessage().c_str()); string responseBody = r->body().asString(); C4Log("Body: %s", responseBody.c_str()); From 0b30e0c60b89efb879109194e4281012bffbfa98 Mon Sep 17 00:00:00 2001 From: jianminzhao <76990468+jianminzhao@users.noreply.github.com> Date: Tue, 30 Jul 2024 16:03:59 -0700 Subject: [PATCH 4/5] CBL-6120: Provide an option to enable full sync in the database (#2113) Added a flag to C4DatabaseFlags, kC4DB_DiskSyncFull. This flag is passed to DataFile::Options, which is used as we request a connection to the SQLite database. --- C/include/c4DatabaseTypes.h | 15 ++++++++------- LiteCore/Database/DatabaseImpl.cc | 1 + LiteCore/Storage/DataFile.cc | 8 +++----- LiteCore/Storage/DataFile.hh | 3 +++ LiteCore/Storage/KeyStore.hh | 4 +++- LiteCore/Storage/SQLiteDataFile.cc | 5 +++-- LiteCore/tests/c4BaseTest.cc | 25 +++++++++++++++++++++++++ 7 files changed, 46 insertions(+), 15 deletions(-) diff --git a/C/include/c4DatabaseTypes.h b/C/include/c4DatabaseTypes.h index 325f76971..0faafc129 100644 --- a/C/include/c4DatabaseTypes.h +++ b/C/include/c4DatabaseTypes.h @@ -31,13 +31,14 @@ C4API_BEGIN_DECLS /** Boolean options for C4DatabaseConfig. */ typedef C4_OPTIONS(uint32_t, C4DatabaseFlags){ - kC4DB_Create = 0x01, ///< Create the file if it doesn't exist - kC4DB_ReadOnly = 0x02, ///< Open file read-only - kC4DB_AutoCompact = 0x04, ///< Enable auto-compaction [UNIMPLEMENTED] - kC4DB_VersionVectors = 0x08, ///< Upgrade DB to version vectors instead of rev trees - kC4DB_NoUpgrade = 0x20, ///< Disable upgrading an older-version database - kC4DB_NonObservable = 0x40, ///< Disable database/collection observers, for slightly faster writes - kC4DB_FakeVectorClock = 0x80, ///< Use counters instead of timestamps in version vectors (TESTS ONLY) + kC4DB_Create = 0x01, ///< Create the file if it doesn't exist + kC4DB_ReadOnly = 0x02, ///< Open file read-only + kC4DB_AutoCompact = 0x04, ///< Enable auto-compaction [UNIMPLEMENTED] + kC4DB_VersionVectors = 0x08, ///< Upgrade DB to version vectors instead of rev trees + kC4DB_NoUpgrade = 0x20, ///< Disable upgrading an older-version database + kC4DB_NonObservable = 0x40, ///< Disable database/collection observers, for slightly faster writes + kC4DB_DiskSyncFull = 0x80, ///< Flush to disk after each transaction + kC4DB_FakeVectorClock = 0x0100, ///< Use counters instead of timestamps in version vectors (TESTS ONLY) }; diff --git a/LiteCore/Database/DatabaseImpl.cc b/LiteCore/Database/DatabaseImpl.cc index a51780290..9799fd756 100644 --- a/LiteCore/Database/DatabaseImpl.cc +++ b/LiteCore/Database/DatabaseImpl.cc @@ -132,6 +132,7 @@ namespace litecore { options.create = (_config.flags & kC4DB_Create) != 0; options.writeable = (_config.flags & kC4DB_ReadOnly) == 0; options.upgradeable = (_config.flags & kC4DB_NoUpgrade) == 0; + options.diskSyncFull = (_config.flags & kC4DB_DiskSyncFull) != 0; options.useDocumentKeys = true; options.encryptionAlgorithm = (EncryptionAlgorithm)_config.encryptionKey.algorithm; if ( options.encryptionAlgorithm != kNoEncryption ) { diff --git a/LiteCore/Storage/DataFile.cc b/LiteCore/Storage/DataFile.cc index 7efa69d16..6610be407 100644 --- a/LiteCore/Storage/DataFile.cc +++ b/LiteCore/Storage/DataFile.cc @@ -108,11 +108,9 @@ namespace litecore { const DataFile::Options DataFile::Options::defaults = { - {true}, // sequences - true, - true, - true, - true // create, writeable, useDocumentKeys, upgradeable + {true}, // sequences + true, true, true, true, // create, writeable, useDocumentKeys, upgradeable + false // diskSyncFull }; DataFile::DataFile(const FilePath& path, Delegate* delegate, const DataFile::Options* options) diff --git a/LiteCore/Storage/DataFile.hh b/LiteCore/Storage/DataFile.hh index d904bdb16..2856c7767 100644 --- a/LiteCore/Storage/DataFile.hh +++ b/LiteCore/Storage/DataFile.hh @@ -75,6 +75,7 @@ namespace litecore { bool writeable : 1; ///< If false, db is opened read-only bool useDocumentKeys : 1; ///< Use SharedKeys for Fleece docs bool upgradeable : 1; ///< DB schema can be upgraded + bool diskSyncFull : 1; ///< SQLite PRAGMA synchronous EncryptionAlgorithm encryptionAlgorithm; ///< What encryption (if any) alloc_slice encryptionKey; ///< Encryption key, if encrypting DatabaseTag dbTag; @@ -267,6 +268,8 @@ namespace litecore { void setOptions(const Options& o) { _options = o; } + const Options& getOptions() const { return _options; } + void forOpenKeyStores(function_ref fn); virtual Factory& factory() const = 0; diff --git a/LiteCore/Storage/KeyStore.hh b/LiteCore/Storage/KeyStore.hh index c5c32b9d5..c07bdf6e6 100644 --- a/LiteCore/Storage/KeyStore.hh +++ b/LiteCore/Storage/KeyStore.hh @@ -11,7 +11,9 @@ // #pragma once -#define LITECORE_CPP_API 1 +#ifndef LITECORE_CPP_API +# define LITECORE_CPP_API 1 +#endif #include "IndexSpec.hh" #include "RecordEnumerator.hh" #include diff --git a/LiteCore/Storage/SQLiteDataFile.cc b/LiteCore/Storage/SQLiteDataFile.cc index 0f4275e84..81ef76bfe 100644 --- a/LiteCore/Storage/SQLiteDataFile.cc +++ b/LiteCore/Storage/SQLiteDataFile.cc @@ -301,11 +301,12 @@ namespace litecore { _exec(format("PRAGMA cache_size=%d; " // Memory cache "PRAGMA mmap_size=%d; " // Memory-mapped reads - "PRAGMA synchronous=normal; " // Speeds up commits + "PRAGMA synchronous=%s; " // Speeds up commits "PRAGMA journal_size_limit=%lld; " // Limit WAL disk usage "PRAGMA case_sensitive_like=true; " // Case sensitive LIKE, for N1QL compat "PRAGMA fullfsync=ON", // Attempt to mitigate damage due to sudden loss of power (iOS / macOS) - -(int)kCacheSize / 1024, kMMapSize, (long long)kJournalSize)); + -(int)kCacheSize / 1024, kMMapSize, getOptions().diskSyncFull ? "full" : "normal", + (long long)kJournalSize)); (void)upgradeSchema(SchemaVersion::WithPurgeCount, "Adding purgeCnt column", [&] { // Schema upgrade: Add the `purgeCnt` column to the kvmeta table. diff --git a/LiteCore/tests/c4BaseTest.cc b/LiteCore/tests/c4BaseTest.cc index 545d1ad69..3b26caa3d 100644 --- a/LiteCore/tests/c4BaseTest.cc +++ b/LiteCore/tests/c4BaseTest.cc @@ -15,6 +15,7 @@ #include "c4ExceptionUtils.hh" #include "fleece/InstanceCounted.hh" #include "catch.hpp" +#include "DatabaseImpl.hh" #include "NumConversion.hh" #include "Actor.hh" #include "URLTransformer.hh" @@ -25,6 +26,7 @@ # include "Error.hh" # include #endif +#include using namespace fleece; using namespace std; @@ -138,6 +140,29 @@ TEST_CASE("C4Error Reporting Macros", "[Errors][C]") { #endif } +TEST_CASE_METHOD(C4Test, "Database Flag FullSync", "[Database][C]") { + // Ensure that, by default, diskSyncFull is false. + CHECK(!litecore::asInternal(db)->dataFile()->options().diskSyncFull); + + C4DatabaseConfig2 config = *c4db_getConfig2(db); + config.flags |= kC4DB_DiskSyncFull; + + std::stringstream ss; + ss << std::string(c4db_getName(db)) << "_" << c4_now(); + c4::ref dbWithFullSync = c4db_openNamed(slice(ss.str().c_str()), &config, ERROR_INFO()); + // The flag in config is passed to DataFile options. + CHECK(litecore::asInternal(dbWithFullSync)->dataFile()->options().diskSyncFull); + + config.flags &= ~kC4DB_DiskSyncFull; + c4::ref otherConnection = c4db_openNamed(c4db_getName(dbWithFullSync), &config, ERROR_INFO()); + // The flag applies per connection opened with the config. + CHECK(!litecore::asInternal(otherConnection)->dataFile()->options().diskSyncFull); + + c4::ref againConnection = c4db_openAgain(dbWithFullSync, ERROR_INFO()); + // The flag is passed to database opened by openAgain. + CHECK(litecore::asInternal(againConnection)->dataFile()->options().diskSyncFull); +} + #pragma mark - INSTANCECOUNTED: namespace { From 163b832d936f795c2c2a3cf15990c03787616294 Mon Sep 17 00:00:00 2001 From: Pasin Suriyentrakorn Date: Tue, 27 Aug 2024 22:23:06 -0700 Subject: [PATCH 5/5] CBL-6193 : Fix address conversion for request when using proxy (#2127) * Fixed crash when converting from address (Address) to C4Address by using the cast operator instead of casting the pointer which is not valid anymore due to the change in private variables. * When creating an address object, used the path not full url for the path. --- Networking/HTTP/HTTPLogic.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Networking/HTTP/HTTPLogic.cc b/Networking/HTTP/HTTPLogic.cc index 09b9d8b47..ee4d29b93 100644 --- a/Networking/HTTP/HTTPLogic.cc +++ b/Networking/HTTP/HTTPLogic.cc @@ -94,8 +94,8 @@ namespace litecore::net { // the new type can be handled the same way. if ( _isWebSocket ) { Address address = {_address.scheme() == "wss"_sl ? "https"_sl : "http"_sl, _address.hostname(), - _address.port(), _address.url()}; - rq << string(Address::toURL(*(C4Address*)&address)); + _address.port(), _address.path()}; + rq << string(Address::toURL(*(C4Address*)address)); } else { rq << string(_address.url()); }