From 0f127066679e1adb0f85e5f9cb8ba0fdac0779cc Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Mon, 14 Oct 2024 15:06:02 -0600 Subject: [PATCH 01/31] DRIVERS-2884 Add connection churn spec tests --- .../tests/connection-churn.json | 663 ++++++++++++++++++ .../tests/connection-churn.yml | 401 +++++++++++ 2 files changed, 1064 insertions(+) create mode 100644 source/client-side-operations-timeout/tests/connection-churn.json create mode 100644 source/client-side-operations-timeout/tests/connection-churn.yml diff --git a/source/client-side-operations-timeout/tests/connection-churn.json b/source/client-side-operations-timeout/tests/connection-churn.json new file mode 100644 index 0000000000..eb5272674d --- /dev/null +++ b/source/client-side-operations-timeout/tests/connection-churn.json @@ -0,0 +1,663 @@ +{ + "description": "operation timeouts do not cause connection churn", + "schemaVersion": "1.9", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "failPointClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "test", + "documents": [] + } + ], + "tests": [ + { + "description": "write op with successful pending read", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1 + }, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "connectionClosedEvent" + ] + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 750 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 500, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "findOne", + "object": "collection", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + } + "commandName": "insert" + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + }, + { + "description": "write op with unsuccessful pending read", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1 + }, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "connectionClosedEvent" + ] + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 2 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 750 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionClosedEvent": { + "reason": "error" + } + } + ] + } + ] + }, + { + "description": "read op with successful pending read", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1 + }, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "connectionClosedEvent" + ] + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 750 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 500, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "findOne", + "object": "collection", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + }, + { + "description": "write op with unsuccessful pending read", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1 + }, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "connectionClosedEvent" + ] + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 2 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 750 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionClosedEvent": { + "reason": "error" + } + } + ] + } + ] + }, + { + "description": "read op with successful pending read", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1 + }, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "connectionClosedEvent" + ] + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 750 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 500, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "findOne", + "object": "collection", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + }, + { + "description": "write op with unsuccessful pending read", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1 + }, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "connectionClosedEvent" + ] + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 2 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 750 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionClosedEvent": { + "reason": "error" + } + } + ] + } + ] + } + ] +} diff --git a/source/client-side-operations-timeout/tests/connection-churn.yml b/source/client-side-operations-timeout/tests/connection-churn.yml new file mode 100644 index 0000000000..47da20e4d9 --- /dev/null +++ b/source/client-side-operations-timeout/tests/connection-churn.yml @@ -0,0 +1,401 @@ +description: "operation timeouts do not cause connection churn" + +schemaVersion: "1.9" + +runOnRequirements: + - minServerVersion: "4.4" + topologies: ["replicaset", "sharded"] + +createEntities: + - client: + id: &failPointClient failPointClient + useMultipleMongoses: false + +initialData: + - collectionName: &collectionName coll + databaseName: &databaseName test + documents: [] + +tests: + - description: "write op with successful pending read" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - connectionClosedEvent + - database: + id: &database database + client: *client + databaseName: *databaseName + - collection: + id: &collection collection + database: *database + collectionName: *collectionName + + # Create a failpoint to block first op + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 1 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 750 + + # Execute op with timeout < block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 500 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # Execute a subsequent operation to complete the read when checking out + # the single available connection. + - name: findOne + object: *collection + arguments: + filter: { _id: 1 } + + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - client: *client + eventType: cmap + events: [] # Expect no connection closure. + + - description: "write op with unsuccessful pending read" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - connectionClosedEvent + - database: + id: &database database + client: *client + databaseName: *databaseName + - collection: + id: &collection collection + database: *database + collectionName: *collectionName + + # Create a failpoint to block first op + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 2 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 750 + + # Execute op with timeout < block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # The pending read should fail. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - client: *client + eventType: cmap + events: + - connectionClosedEvent: + reason: error + + - description: "read op with successful pending read" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - connectionClosedEvent + - database: + id: &database database + client: *client + databaseName: *databaseName + - collection: + id: &collection collection + database: *database + collectionName: *collectionName + + # Create a failpoint to block first op + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 1 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 750 + + # Execute op with timeout < block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 500 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # Execute a subsequent operation to complete the read when checking out + # the single available connection. + - name: findOne + object: *collection + arguments: + filter: { _id: 1 } + + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - client: *client + eventType: cmap + events: [] # Expect no connection closure. + + - description: "write op with unsuccessful pending read" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + # For single-threaded drivers, ensure the operating connection + # is checked out to complete the read. + maxPoolSize: 1 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - connectionClosedEvent + - database: + id: &database database + client: *client + databaseName: *databaseName + - collection: + id: &collection collection + database: *database + collectionName: *collectionName + + # Create a failpoint to block first op + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 2 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 750 + + # Execute op with timeout < block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # The pending read should fail. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - client: *client + eventType: cmap + events: + - connectionClosedEvent: + reason: error + + - description: "read op with successful pending read" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - connectionClosedEvent + - database: + id: &database database + client: *client + databaseName: *databaseName + - collection: + id: &collection collection + database: *database + collectionName: *collectionName + + # Create a failpoint to block first op + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 1 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 750 + + # Execute op with timeout < block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 500 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # Execute a subsequent operation to complete the read when checking out + # the single available connection. + - name: findOne + object: *collection + arguments: + filter: { _id: 1 } + + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - client: *client + eventType: cmap + events: [] # Expect no connection closure. + + - description: "write op with unsuccessful pending read" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + # For single-threaded drivers, ensure the operating connection + # is checked out to complete the read. + maxPoolSize: 1 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - connectionClosedEvent + - database: + id: &database database + client: *client + databaseName: *databaseName + - collection: + id: &collection collection + database: *database + collectionName: *collectionName + + # Create a failpoint to block first op + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 2 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 750 + + # Execute op with timeout < block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # The pending read should fail. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - client: *client + eventType: cmap + events: + - connectionClosedEvent: + reason: error + From fe181207d66ca03b95b98a7b9a3fe9e5c60fae29 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Mon, 14 Oct 2024 15:10:40 -0600 Subject: [PATCH 02/31] DRIVERS-2884 Update json --- .../client-side-operations-timeout/tests/connection-churn.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/connection-churn.json b/source/client-side-operations-timeout/tests/connection-churn.json index eb5272674d..b20c197919 100644 --- a/source/client-side-operations-timeout/tests/connection-churn.json +++ b/source/client-side-operations-timeout/tests/connection-churn.json @@ -114,8 +114,8 @@ "events": [ { "commandFailedEvent": { - } "commandName": "insert" + } } ] }, From 05cc88b8c9b9438af889d695a93171bb8ebdcb42 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Oct 2024 15:59:53 -0600 Subject: [PATCH 03/31] DRIVERS-2884 Clean up spec tests --- .../tests/connection-churn.json | 498 +++++++----------- .../tests/connection-churn.yml | 369 +++++-------- 2 files changed, 317 insertions(+), 550 deletions(-) diff --git a/source/client-side-operations-timeout/tests/connection-churn.json b/source/client-side-operations-timeout/tests/connection-churn.json index b20c197919..0706e43193 100644 --- a/source/client-side-operations-timeout/tests/connection-churn.json +++ b/source/client-side-operations-timeout/tests/connection-churn.json @@ -1,12 +1,12 @@ { - "description": "operation timeouts do not cause connection churn", + "description": "Operation timeouts do not cause connection churn", "schemaVersion": "1.9", "runOnRequirements": [ { "minServerVersion": "4.4", "topologies": [ - "replicaset", - "sharded" + "standalone", + "replicaset" ] } ], @@ -16,6 +16,36 @@ "id": "failPointClient", "useMultipleMongoses": false } + }, + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1 + }, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "commandSucceededEvent", + "connectionCheckedOutEvent", + "connectionCheckedInEvent", + "connectionClosedEvent" + ] + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } } ], "initialData": [ @@ -27,43 +57,8 @@ ], "tests": [ { - "description": "write op with successful pending read", + "description": "Write operation with successful pending read", "operations": [ - { - "name": "createEntities", - "object": "testRunner", - "arguments": { - "entities": [ - { - "client": { - "id": "client", - "uriOptions": { - "maxPoolSize": 1 - }, - "useMultipleMongoses": false, - "observeEvents": [ - "commandFailedEvent", - "connectionClosedEvent" - ] - } - }, - { - "database": { - "id": "database", - "client": "client", - "databaseName": "test" - } - }, - { - "collection": { - "id": "collection", - "database": "database", - "collectionName": "coll" - } - } - ] - } - }, { "name": "failPoint", "object": "testRunner", @@ -86,9 +81,9 @@ }, { "name": "insertOne", - "object": "collection", + "object": "coll", "arguments": { - "timeoutMS": 500, + "timeoutMS": 50, "document": { "_id": 3, "x": 1 @@ -100,7 +95,7 @@ }, { "name": "findOne", - "object": "collection", + "object": "coll", "arguments": { "filter": { "_id": 1 @@ -116,54 +111,37 @@ "commandFailedEvent": { "commandName": "insert" } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } } ] }, { "client": "client", "eventType": "cmap", - "events": [] + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] } ] }, { - "description": "write op with unsuccessful pending read", + "description": "Concurrent write operation with successful pending read", "operations": [ - { - "name": "createEntities", - "object": "testRunner", - "arguments": { - "entities": [ - { - "client": { - "id": "client", - "uriOptions": { - "maxPoolSize": 1 - }, - "useMultipleMongoses": false, - "observeEvents": [ - "commandFailedEvent", - "connectionClosedEvent" - ] - } - }, - { - "database": { - "id": "database", - "client": "client", - "databaseName": "test" - } - }, - { - "collection": { - "id": "collection", - "database": "database", - "collectionName": "coll" - } - } - ] - } - }, { "name": "failPoint", "object": "testRunner", @@ -172,7 +150,7 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 2 + "times": 1 }, "data": { "failCommands": [ @@ -184,139 +162,77 @@ } } }, - { - "name": "insertOne", - "object": "collection", - "arguments": { - "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - }, - { - "name": "insertOne", - "object": "collection", - "arguments": { - "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - } - ], - "expectEvents": [ - { - "client": "client", - "events": [ - { - "commandFailedEvent": { - "commandName": "insert" - } - } - ] - }, - { - "client": "client", - "eventType": "cmap", - "events": [ - { - "connectionClosedEvent": { - "reason": "error" - } - } - ] - } - ] - }, - { - "description": "read op with successful pending read", - "operations": [ { "name": "createEntities", "object": "testRunner", "arguments": { "entities": [ { - "client": { - "id": "client", - "uriOptions": { - "maxPoolSize": 1 - }, - "useMultipleMongoses": false, - "observeEvents": [ - "commandFailedEvent", - "connectionClosedEvent" - ] - } - }, - { - "database": { - "id": "database", - "client": "client", - "databaseName": "test" + "thread": { + "id": "thread0" } }, { - "collection": { - "id": "collection", - "database": "database", - "collectionName": "coll" + "thread": { + "id": "thread1" } } ] } }, { - "name": "failPoint", + "name": "runOnThread", "object": "testRunner", "arguments": { - "client": "failPointClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "times": 1 - }, - "data": { - "failCommands": [ - "insert" - ], - "blockConnection": true, - "blockTimeMS": 750 + "thread": "thread0", + "operation": { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 500, + "document": { + "_id": 2 + } } + }, + "expectError": { + "isTimeoutError": true } } }, { - "name": "insertOne", - "object": "collection", + "name": "waitForEvent", + "object": "testRunner", "arguments": { - "timeoutMS": 500, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true + "client": "client", + "event": { + "connectionCheckedOutEvent": {} + }, + "count": 1 } }, { - "name": "findOne", - "object": "collection", + "name": "runOnThread", + "object": "testRunner", "arguments": { - "filter": { - "_id": 1 + "thread": "thread1", + "operation": { + "name": "insertOne", + "object": "coll", + "arguments": { + "document": { + "_id": 3 + } + } } } + }, + { + "name": "waitForThread", + "object": "testRunner", + "arguments": { + "thread": "thread1" + } } ], "expectEvents": [ @@ -327,54 +243,37 @@ "commandFailedEvent": { "commandName": "insert" } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } } ] }, { "client": "client", "eventType": "cmap", - "events": [] + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] } ] }, { - "description": "write op with unsuccessful pending read", + "description": "Write operation with unsuccessful pending read", "operations": [ - { - "name": "createEntities", - "object": "testRunner", - "arguments": { - "entities": [ - { - "client": { - "id": "client", - "uriOptions": { - "maxPoolSize": 1 - }, - "useMultipleMongoses": false, - "observeEvents": [ - "commandFailedEvent", - "connectionClosedEvent" - ] - } - }, - { - "database": { - "id": "database", - "client": "client", - "databaseName": "test" - } - }, - { - "collection": { - "id": "collection", - "database": "database", - "collectionName": "coll" - } - } - ] - } - }, { "name": "failPoint", "object": "testRunner", @@ -383,7 +282,7 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 2 + "times": 1 }, "data": { "failCommands": [ @@ -397,7 +296,7 @@ }, { "name": "insertOne", - "object": "collection", + "object": "coll", "arguments": { "timeoutMS": 50, "document": { @@ -411,9 +310,9 @@ }, { "name": "insertOne", - "object": "collection", + "object": "coll", "arguments": { - "timeoutMS": 50, + "timeoutMS": 500, "document": { "_id": 3, "x": 1 @@ -439,6 +338,12 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, { "connectionClosedEvent": { "reason": "error" @@ -449,43 +354,8 @@ ] }, { - "description": "read op with successful pending read", + "description": "Read operation with successful pending read", "operations": [ - { - "name": "createEntities", - "object": "testRunner", - "arguments": { - "entities": [ - { - "client": { - "id": "client", - "uriOptions": { - "maxPoolSize": 1 - }, - "useMultipleMongoses": false, - "observeEvents": [ - "commandFailedEvent", - "connectionClosedEvent" - ] - } - }, - { - "database": { - "id": "database", - "client": "client", - "databaseName": "test" - } - }, - { - "collection": { - "id": "collection", - "database": "database", - "collectionName": "coll" - } - } - ] - } - }, { "name": "failPoint", "object": "testRunner", @@ -498,7 +368,7 @@ }, "data": { "failCommands": [ - "insert" + "find" ], "blockConnection": true, "blockTimeMS": 750 @@ -507,13 +377,12 @@ } }, { - "name": "insertOne", - "object": "collection", + "name": "findOne", + "object": "coll", "arguments": { - "timeoutMS": 500, - "document": { - "_id": 3, - "x": 1 + "timeoutMS": 50, + "filter": { + "_id": 1 } }, "expectError": { @@ -522,7 +391,7 @@ }, { "name": "findOne", - "object": "collection", + "object": "coll", "arguments": { "filter": { "_id": 1 @@ -536,7 +405,12 @@ "events": [ { "commandFailedEvent": { - "commandName": "insert" + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" } } ] @@ -544,48 +418,26 @@ { "client": "client", "eventType": "cmap", - "events": [] + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] } ] }, { - "description": "write op with unsuccessful pending read", + "description": "Read operation with unsuccessful pending read", "operations": [ - { - "name": "createEntities", - "object": "testRunner", - "arguments": { - "entities": [ - { - "client": { - "id": "client", - "uriOptions": { - "maxPoolSize": 1 - }, - "useMultipleMongoses": false, - "observeEvents": [ - "commandFailedEvent", - "connectionClosedEvent" - ] - } - }, - { - "database": { - "id": "database", - "client": "client", - "databaseName": "test" - } - }, - { - "collection": { - "id": "collection", - "database": "database", - "collectionName": "coll" - } - } - ] - } - }, { "name": "failPoint", "object": "testRunner", @@ -594,11 +446,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 2 + "times": 1 }, "data": { "failCommands": [ - "insert" + "find" ], "blockConnection": true, "blockTimeMS": 750 @@ -607,13 +459,12 @@ } }, { - "name": "insertOne", - "object": "collection", + "name": "findOne", + "object": "coll", "arguments": { "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 + "filter": { + "_id": 1 } }, "expectError": { @@ -621,13 +472,12 @@ } }, { - "name": "insertOne", - "object": "collection", + "name": "findOne", + "object": "coll", "arguments": { - "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 + "timeoutMS": 500, + "filter": { + "_id": 1 } }, "expectError": { @@ -641,7 +491,7 @@ "events": [ { "commandFailedEvent": { - "commandName": "insert" + "commandName": "find" } } ] @@ -650,6 +500,12 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, { "connectionClosedEvent": { "reason": "error" diff --git a/source/client-side-operations-timeout/tests/connection-churn.yml b/source/client-side-operations-timeout/tests/connection-churn.yml index 47da20e4d9..92f67bc006 100644 --- a/source/client-side-operations-timeout/tests/connection-churn.yml +++ b/source/client-side-operations-timeout/tests/connection-churn.yml @@ -1,46 +1,48 @@ -description: "operation timeouts do not cause connection churn" +description: "Operation timeouts do not cause connection churn" schemaVersion: "1.9" runOnRequirements: - minServerVersion: "4.4" - topologies: ["replicaset", "sharded"] + # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired + # after maxTimeMS, whereas mongod returns it after + # max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests + # will not pass on sharded clusters. + topologies: ["standalone", "replicaset"] createEntities: - client: id: &failPointClient failPointClient useMultipleMongoses: false + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - commandSucceededEvent + - connectionCheckedOutEvent + - connectionCheckedInEvent + - connectionClosedEvent + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection initialData: - - collectionName: &collectionName coll - databaseName: &databaseName test + - collectionName: *collection + databaseName: *database documents: [] tests: - - description: "write op with successful pending read" + - description: "Write operation with successful pending read" operations: - - name: createEntities - object: testRunner - arguments: - entities: - - client: - id: &client client - uriOptions: - maxPoolSize: 1 - useMultipleMongoses: false - observeEvents: - - commandFailedEvent - - connectionClosedEvent - - database: - id: &database database - client: *client - databaseName: *databaseName - - collection: - id: &collection collection - database: *database - collectionName: *collectionName - - # Create a failpoint to block first op + # Create a failpoint to block the first operation - name: failPoint object: testRunner arguments: @@ -53,17 +55,16 @@ tests: blockConnection: true blockTimeMS: 750 - # Execute op with timeout < block time + # Execute operation with timeout less than block time - name: insertOne object: *collection arguments: - timeoutMS: 500 + timeoutMS: 50 document: { _id: 3, x: 1 } expectError: isTimeoutError: true - # Execute a subsequent operation to complete the read when checking out - # the single available connection. + # Execute a subsequent operation to complete the read - name: findOne object: *collection arguments: @@ -74,175 +75,117 @@ tests: events: - commandFailedEvent: commandName: insert + - commandSucceededEvent: + commandName: find - client: *client eventType: cmap - events: [] # Expect no connection closure. + events: + - connectionCheckedOutEvent: {} # insert + - connectionCheckedInEvent: {} # insert fails + - connectionCheckedOutEvent: {} # find + - connectionCheckedInEvent: {} # find succeeds - - description: "write op with unsuccessful pending read" + - description: "Concurrent write operation with successful pending read" operations: - - name: createEntities - object: testRunner - arguments: - entities: - - client: - id: &client client - uriOptions: - maxPoolSize: 1 - useMultipleMongoses: false - observeEvents: - - commandFailedEvent - - connectionClosedEvent - - database: - id: &database database - client: *client - databaseName: *databaseName - - collection: - id: &collection collection - database: *database - collectionName: *collectionName - - # Create a failpoint to block first op + # Create a failpoint to block the first operation - name: failPoint object: testRunner arguments: client: *failPointClient failPoint: configureFailPoint: failCommand - mode: { times: 2 } + mode: { times: 1 } data: failCommands: ["insert"] blockConnection: true blockTimeMS: 750 - # Execute op with timeout < block time - - name: insertOne - object: *collection - arguments: - timeoutMS: 50 - document: { _id: 3, x: 1 } - expectError: - isTimeoutError: true - - # The pending read should fail. - - name: insertOne - object: *collection - arguments: - timeoutMS: 50 - document: { _id: 3, x: 1 } - expectError: - isTimeoutError: true - - expectEvents: - - client: *client - events: - - commandFailedEvent: - commandName: insert - - client: *client - eventType: cmap - events: - - connectionClosedEvent: - reason: error - - - description: "read op with successful pending read" - operations: + # Start threads. - name: createEntities object: testRunner arguments: entities: - - client: - id: &client client - uriOptions: - maxPoolSize: 1 - useMultipleMongoses: false - observeEvents: - - commandFailedEvent - - connectionClosedEvent - - database: - id: &database database - client: *client - databaseName: *databaseName - - collection: - id: &collection collection - database: *database - collectionName: *collectionName - - # Create a failpoint to block first op - - name: failPoint + - thread: + id: &thread0 thread0 + - thread: + id: &thread1 thread1 + + # Run an insert in two threads. We expect the first to time out and the + # second to finish the pending read from the first and complete + # successfully. + - name: runOnThread object: testRunner arguments: - client: *failPointClient - failPoint: - configureFailPoint: failCommand - mode: { times: 1 } - data: - failCommands: ["insert"] - blockConnection: true - blockTimeMS: 750 - - # Execute op with timeout < block time - - name: insertOne - object: *collection + thread: *thread0 + operation: + name: insertOne + object: *collection + arguments: + timeoutMS: 500 + document: + _id: 2 + expectError: + isTimeoutError: true + + # Ensure the first thread checks out a connection before executing the + # operation in the second thread. This maintains concurrent behavior but + # presents the worst case scenario. + - name: waitForEvent + object: testRunner arguments: - timeoutMS: 500 - document: { _id: 3, x: 1 } - expectError: - isTimeoutError: true + client: *client + event: + connectionCheckedOutEvent: {} + count: 1 - # Execute a subsequent operation to complete the read when checking out - # the single available connection. - - name: findOne - object: *collection + - name: runOnThread + object: testRunner arguments: - filter: { _id: 1 } + thread: *thread1 + operation: + name: insertOne + object: *collection + arguments: + document: + _id: 3 + + # Stop threads. + - name: waitForThread + object: testRunner + arguments: + thread: *thread1 expectEvents: - client: *client events: - commandFailedEvent: commandName: insert + - commandSucceededEvent: + commandName: insert - client: *client eventType: cmap - events: [] # Expect no connection closure. + events: + - connectionCheckedOutEvent: {} # insert + - connectionCheckedInEvent: {} # insert fails + - connectionCheckedOutEvent: {} # find + - connectionCheckedInEvent: {} # find succeeds - - description: "write op with unsuccessful pending read" + - description: "Write operation with unsuccessful pending read" operations: - - name: createEntities - object: testRunner - arguments: - entities: - - client: - id: &client client - uriOptions: - # For single-threaded drivers, ensure the operating connection - # is checked out to complete the read. - maxPoolSize: 1 - useMultipleMongoses: false - observeEvents: - - commandFailedEvent - - connectionClosedEvent - - database: - id: &database database - client: *client - databaseName: *databaseName - - collection: - id: &collection collection - database: *database - collectionName: *collectionName - - # Create a failpoint to block first op + # Create a failpoint to block the first operation - name: failPoint object: testRunner arguments: client: *failPointClient failPoint: configureFailPoint: failCommand - mode: { times: 2 } + mode: { times: 1 } data: failCommands: ["insert"] blockConnection: true blockTimeMS: 750 - # Execute op with timeout < block time + # Execute operation with timeout less than block time - name: insertOne object: *collection arguments: @@ -251,11 +194,11 @@ tests: expectError: isTimeoutError: true - # The pending read should fail. + # The pending read should fail - name: insertOne object: *collection arguments: - timeoutMS: 50 + timeoutMS: 500 document: { _id: 3, x: 1 } expectError: isTimeoutError: true @@ -265,36 +208,19 @@ tests: events: - commandFailedEvent: commandName: insert + # No second failed event since we timed out attempting to check out + # the connection for the second operation - client: *client eventType: cmap - events: - - connectionClosedEvent: + events: + - connectionCheckedOutEvent: {} # first insert + - connectionCheckedInEvent: {} # first insert fails + - connectionClosedEvent: # second insert times out pending read in checkout, closes reason: error - - description: "read op with successful pending read" + - description: "Read operation with successful pending read" operations: - - name: createEntities - object: testRunner - arguments: - entities: - - client: - id: &client client - uriOptions: - maxPoolSize: 1 - useMultipleMongoses: false - observeEvents: - - commandFailedEvent - - connectionClosedEvent - - database: - id: &database database - client: *client - databaseName: *databaseName - - collection: - id: &collection collection - database: *database - collectionName: *collectionName - - # Create a failpoint to block first op + # Create a failpoint to block the first operation - name: failPoint object: testRunner arguments: @@ -303,21 +229,20 @@ tests: configureFailPoint: failCommand mode: { times: 1 } data: - failCommands: ["insert"] + failCommands: ["find"] blockConnection: true blockTimeMS: 750 - # Execute op with timeout < block time - - name: insertOne + # Execute operation with timeout less than block time + - name: findOne object: *collection arguments: - timeoutMS: 500 - document: { _id: 3, x: 1 } + timeoutMS: 50 + filter: { _id: 1 } expectError: isTimeoutError: true - # Execute a subsequent operation to complete the read when checking out - # the single available connection. + # Execute a subsequent operation to complete the read - name: findOne object: *collection arguments: @@ -327,64 +252,47 @@ tests: - client: *client events: - commandFailedEvent: - commandName: insert + commandName: find + - commandSucceededEvent: + commandName: find - client: *client eventType: cmap - events: [] # Expect no connection closure. + events: + - connectionCheckedOutEvent: {} # first find + - connectionCheckedInEvent: {} # first find fails + - connectionCheckedOutEvent: {} # second find + - connectionCheckedInEvent: {} # second find succeeds - - description: "write op with unsuccessful pending read" + - description: "Read operation with unsuccessful pending read" operations: - - name: createEntities - object: testRunner - arguments: - entities: - - client: - id: &client client - uriOptions: - # For single-threaded drivers, ensure the operating connection - # is checked out to complete the read. - maxPoolSize: 1 - useMultipleMongoses: false - observeEvents: - - commandFailedEvent - - connectionClosedEvent - - database: - id: &database database - client: *client - databaseName: *databaseName - - collection: - id: &collection collection - database: *database - collectionName: *collectionName - - # Create a failpoint to block first op + # Create a failpoint to block the first operation - name: failPoint object: testRunner arguments: client: *failPointClient failPoint: configureFailPoint: failCommand - mode: { times: 2 } + mode: { times: 1 } data: - failCommands: ["insert"] + failCommands: ["find"] blockConnection: true blockTimeMS: 750 - # Execute op with timeout < block time - - name: insertOne + # Execute operation with timeout less than block time + - name: findOne object: *collection arguments: timeoutMS: 50 - document: { _id: 3, x: 1 } + filter: { _id: 1 } expectError: isTimeoutError: true - # The pending read should fail. - - name: insertOne + # The pending read should fail + - name: findOne object: *collection arguments: - timeoutMS: 50 - document: { _id: 3, x: 1 } + timeoutMS: 500 + filter: { _id: 1 } expectError: isTimeoutError: true @@ -392,10 +300,13 @@ tests: - client: *client events: - commandFailedEvent: - commandName: insert + commandName: find + # No second failed event since we timed out attempting to check out + # the connection for the second operation - client: *client eventType: cmap - events: - - connectionClosedEvent: + events: + - connectionCheckedOutEvent: {} # first find + - connectionCheckedInEvent: {} # first find fails + - connectionClosedEvent: # second find times out pending read in checkout, closes reason: error - From 98c2a73f520f7da3fdd8ce34ac442d4f59573f59 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Oct 2024 16:46:47 -0600 Subject: [PATCH 04/31] Update CMAP to include foreground read --- .../connection-monitoring-and-pooling.md | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index bb8a454b13..c1121f5b69 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -574,6 +574,51 @@ other threads from checking out [Connections](#connection) while establishing a Before a given [Connection](#connection) is returned from checkOut, it must be marked as "in use", and the pool's availableConnectionCount MUST be decremented. +If an operation times out the socket while awaiting a server response, the driver MUST mark the connection as "pending." +When this connection is next checked out the driver MUST attempt to complete the read. This process should continue +until the response is successfully read or the cumulative `pendingResponseTimeoutLimit` of 400ms is reached. If the `pendingResponseTimeoutLimit` is reached, the connection MUST be closed. + +```python +def await_pending_read(pool, conn): + # If there are no bytes pending read, do nothing + if conn.await_remaining_bytes is None: + return None + + size = conn.await_remaining_bytes + check_in = False + + try: + start_time = current_time() + + if size == 0: + size_buf = conn.read_bytes(4) + if size_buf is None: + conn.remaining_time_for_pending_read -= time_since(start_time) + check_in = True + raise Exception("Error reading the message size") + + size = conn.parse_message_size(size_buf) - 4 + + bytes_read = conn.discard_bytes(size) + if bytes_read < size: + # If the read times out, record the bytes left to read before exiting + if bytes_read == 0 and conn.is_timeout_error(): + conn.await_remaining_bytes += size + conn.remaining_for_pending_read -= time_since(start_time) + + check_in = True + raise Exception(f"Error discarding {size} byte message") + + conn.await_remaining_bytes = None + conn.remaining_time = None + + finally: + if conn.remaining_time is not None and conn.remaining_time < 0: + conn.close() + elif check_in: + pool.check_in(conn) +``` + ```python connection = Null tConnectionCheckOutStarted = current instant (use a monotonic clock if possible) @@ -631,6 +676,11 @@ if connection state is "pending": decrement pendingConnectionCount else: decrement availableConnectionCount + +error = await_pending_read(pool, connection) +if error: + return error + set connection state to "in use" # If there is no background thread, the pool MUST ensure that From 48279955377e3db00bca40cae55fbf1873327e41 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Oct 2024 16:49:07 -0600 Subject: [PATCH 05/31] Update changelog --- .../connection-monitoring-and-pooling.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index c1121f5b69..7c074e65c1 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1424,8 +1424,6 @@ to close and remove from its pool a [Connection](#connection) which has unread e ## Changelog -- 2024-01-23: Migrated from reStructuredText to Markdown. - - 2019-06-06: Add "connectionError" as a valid reason for ConnectionCheckOutFailedEvent - 2020-09-03: Clarify Connection states and definition. Require the use of a background thread and/or async I/O. Add @@ -1457,4 +1455,8 @@ to close and remove from its pool a [Connection](#connection) which has unread e - 2023-10-04: Commit to the currently specified requirements regarding durations in events. +- 2024-01-23: Migrated from reStructuredText to Markdown. + +- 2024-10-30: Attempt pending reads during connection check-out for sockets that have timed out. + ______________________________________________________________________ From 234b729be07412a99d05f431916be32143dee722 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Oct 2024 16:55:58 -0600 Subject: [PATCH 06/31] Add justification for CMAP update --- .../connection-monitoring-and-pooling.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 7c074e65c1..ae20e645a9 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -576,7 +576,9 @@ availableConnectionCount MUST be decremented. If an operation times out the socket while awaiting a server response, the driver MUST mark the connection as "pending." When this connection is next checked out the driver MUST attempt to complete the read. This process should continue -until the response is successfully read or the cumulative `pendingResponseTimeoutLimit` of 400ms is reached. If the `pendingResponseTimeoutLimit` is reached, the connection MUST be closed. +until the response is successfully read or the cumulative `pendingResponseTimeoutLimit` of 400ms is reached. If the `pendingResponseTimeoutLimit` is reached, the connection MUST be closed. The goal of this procedure is to minimize +connection churn by attempting to empty server responses under CSOT conditions (e.g. a socket timeout) rather than +strictly closing a connection. ```python def await_pending_read(pool, conn): From ccfbcf145a246f0faa868ea89b8c6ad88f73984d Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Oct 2024 16:57:21 -0600 Subject: [PATCH 07/31] Remove unecessary example --- .../connection-monitoring-and-pooling.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index ae20e645a9..2f87798170 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -577,8 +577,8 @@ availableConnectionCount MUST be decremented. If an operation times out the socket while awaiting a server response, the driver MUST mark the connection as "pending." When this connection is next checked out the driver MUST attempt to complete the read. This process should continue until the response is successfully read or the cumulative `pendingResponseTimeoutLimit` of 400ms is reached. If the `pendingResponseTimeoutLimit` is reached, the connection MUST be closed. The goal of this procedure is to minimize -connection churn by attempting to empty server responses under CSOT conditions (e.g. a socket timeout) rather than -strictly closing a connection. +connection churn by attempting to empty server responses under CSOT conditions rather than strictly closing a +connection. ```python def await_pending_read(pool, conn): From fed567bcb3af82e6aa590e1d765b884c4a747b20 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Oct 2024 17:05:04 -0600 Subject: [PATCH 08/31] Use consistent keys --- .../connection-monitoring-and-pooling.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 2f87798170..28fc643869 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -612,10 +612,10 @@ def await_pending_read(pool, conn): raise Exception(f"Error discarding {size} byte message") conn.await_remaining_bytes = None - conn.remaining_time = None + conn.remaining_time_for_pending_read = None finally: - if conn.remaining_time is not None and conn.remaining_time < 0: + if conn.remaining_time_for_pending_read is not None and conn.remaining_time_for_pending_read < 0: conn.close() elif check_in: pool.check_in(conn) From 8840be4bc4e64e86aac9a88f0281c9dd79d1ae85 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Thu, 7 Nov 2024 14:16:53 -0700 Subject: [PATCH 09/31] Update timeouts --- .../tests/connection-churn.json | 8 ++++---- .../tests/connection-churn.yml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/client-side-operations-timeout/tests/connection-churn.json b/source/client-side-operations-timeout/tests/connection-churn.json index 0706e43193..1f9c9f4cf0 100644 --- a/source/client-side-operations-timeout/tests/connection-churn.json +++ b/source/client-side-operations-timeout/tests/connection-churn.json @@ -289,7 +289,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 750 + "blockTimeMS": 1100 } } } @@ -312,7 +312,7 @@ "name": "insertOne", "object": "coll", "arguments": { - "timeoutMS": 500, + "timeoutMS": 1000, "document": { "_id": 3, "x": 1 @@ -453,7 +453,7 @@ "find" ], "blockConnection": true, - "blockTimeMS": 750 + "blockTimeMS": 1100 } } } @@ -475,7 +475,7 @@ "name": "findOne", "object": "coll", "arguments": { - "timeoutMS": 500, + "timeoutMS": 1000, "filter": { "_id": 1 } diff --git a/source/client-side-operations-timeout/tests/connection-churn.yml b/source/client-side-operations-timeout/tests/connection-churn.yml index 92f67bc006..12fca8e548 100644 --- a/source/client-side-operations-timeout/tests/connection-churn.yml +++ b/source/client-side-operations-timeout/tests/connection-churn.yml @@ -183,7 +183,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 750 + blockTimeMS: 1100 # Execute operation with timeout less than block time - name: insertOne @@ -198,7 +198,7 @@ tests: - name: insertOne object: *collection arguments: - timeoutMS: 500 + timeoutMS: 1000 document: { _id: 3, x: 1 } expectError: isTimeoutError: true @@ -276,7 +276,7 @@ tests: data: failCommands: ["find"] blockConnection: true - blockTimeMS: 750 + blockTimeMS: 1100 # Execute operation with timeout less than block time - name: findOne @@ -291,7 +291,7 @@ tests: - name: findOne object: *collection arguments: - timeoutMS: 500 + timeoutMS: 1000 filter: { _id: 1 } expectError: isTimeoutError: true From c0e5aee5caa7f0f4e0d9f70fbaaa803e857deba2 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Mon, 21 Apr 2025 18:13:41 -0600 Subject: [PATCH 10/31] DRIVERS-2884 Update pending response unified spec tests --- ...ction-churn.json => pending-response.json} | 289 ++++++++---------- ...nection-churn.yml => pending-response.yml} | 254 ++++++++------- 2 files changed, 256 insertions(+), 287 deletions(-) rename source/client-side-operations-timeout/tests/{connection-churn.json => pending-response.json} (74%) rename source/client-side-operations-timeout/tests/{connection-churn.yml => pending-response.yml} (53%) diff --git a/source/client-side-operations-timeout/tests/connection-churn.json b/source/client-side-operations-timeout/tests/pending-response.json similarity index 74% rename from source/client-side-operations-timeout/tests/connection-churn.json rename to source/client-side-operations-timeout/tests/pending-response.json index 1f9c9f4cf0..d623a25b6e 100644 --- a/source/client-side-operations-timeout/tests/connection-churn.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -1,11 +1,11 @@ { - "description": "Operation timeouts do not cause connection churn", + "description": "Connection churn is prevented by reading pending responses during connection checkout", "schemaVersion": "1.9", "runOnRequirements": [ { "minServerVersion": "4.4", "topologies": [ - "standalone", + "single", "replicaset" ] } @@ -29,7 +29,10 @@ "commandSucceededEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", - "connectionClosedEvent" + "connectionClosedEvent", + "connectionPendingReadSucceeded", + "connectionPendingReadStarted", + "connectionPendingReadFailed" ] } }, @@ -57,8 +60,18 @@ ], "tests": [ { - "description": "Write operation with successful pending read", + "description": "non-timeout write op with response and no error", "operations": [ + { + "name": "runCommand", + "object": "test", + "command_name": "ping", + "arguments": { + "command": { + "ping": 1 + } + } + }, { "name": "failPoint", "object": "testRunner", @@ -74,7 +87,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 750 + "blockTimeMS": 100 } } } @@ -83,7 +96,7 @@ "name": "insertOne", "object": "coll", "arguments": { - "timeoutMS": 50, + "timeoutMS": 75, "document": { "_id": 3, "x": 1 @@ -107,6 +120,11 @@ { "client": "client", "events": [ + { + "commandSucceededEvent": { + "commandName": "ping" + } + }, { "commandFailedEvent": { "commandName": "insert" @@ -134,132 +152,12 @@ }, { "connectionCheckedInEvent": {} - } - ] - } - ] - }, - { - "description": "Concurrent write operation with successful pending read", - "operations": [ - { - "name": "failPoint", - "object": "testRunner", - "arguments": { - "client": "failPointClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "times": 1 - }, - "data": { - "failCommands": [ - "insert" - ], - "blockConnection": true, - "blockTimeMS": 750 - } - } - } - }, - { - "name": "createEntities", - "object": "testRunner", - "arguments": { - "entities": [ - { - "thread": { - "id": "thread0" - } - }, - { - "thread": { - "id": "thread1" - } - } - ] - } - }, - { - "name": "runOnThread", - "object": "testRunner", - "arguments": { - "thread": "thread0", - "operation": { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 500, - "document": { - "_id": 2 - } - } - }, - "expectError": { - "isTimeoutError": true - } - } - }, - { - "name": "waitForEvent", - "object": "testRunner", - "arguments": { - "client": "client", - "event": { - "connectionCheckedOutEvent": {} - }, - "count": 1 - } - }, - { - "name": "runOnThread", - "object": "testRunner", - "arguments": { - "thread": "thread1", - "operation": { - "name": "insertOne", - "object": "coll", - "arguments": { - "document": { - "_id": 3 - } - } - } - } - }, - { - "name": "waitForThread", - "object": "testRunner", - "arguments": { - "thread": "thread1" - } - } - ], - "expectEvents": [ - { - "client": "client", - "events": [ - { - "commandFailedEvent": { - "commandName": "insert" - } }, { - "commandSucceededEvent": { - "commandName": "insert" - } - } - ] - }, - { - "client": "client", - "eventType": "cmap", - "events": [ - { - "connectionCheckedOutEvent": {} + "connectionPendingReadStarted": {} }, { - "connectionCheckedInEvent": {} + "connectionPendingReadSucceeded": {} }, { "connectionCheckedOutEvent": {} @@ -272,8 +170,18 @@ ] }, { - "description": "Write operation with unsuccessful pending read", + "description": "non-timeout op with no response", "operations": [ + { + "name": "runCommand", + "object": "test", + "command_name": "ping", + "arguments": { + "command": { + "ping": 1 + } + } + }, { "name": "failPoint", "object": "testRunner", @@ -289,7 +197,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 1100 + "blockTimeMS": 3100 } } } @@ -309,13 +217,11 @@ } }, { - "name": "insertOne", + "name": "findOne", "object": "coll", "arguments": { - "timeoutMS": 1000, - "document": { - "_id": 3, - "x": 1 + "filter": { + "_id": 1 } }, "expectError": { @@ -327,6 +233,11 @@ { "client": "client", "events": [ + { + "commandSucceededEvent": { + "commandName": "ping" + } + }, { "commandFailedEvent": { "commandName": "insert" @@ -344,6 +255,18 @@ { "connectionCheckedInEvent": {} }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingReadStarted": {} + }, + { + "connectionPendingReadFailed": {} + }, { "connectionClosedEvent": { "reason": "error" @@ -354,8 +277,18 @@ ] }, { - "description": "Read operation with successful pending read", + "description": "timeout write op with response and no error", "operations": [ + { + "name": "runCommand", + "object": "test", + "command_name": "ping", + "arguments": { + "command": { + "ping": 1 + } + } + }, { "name": "failPoint", "object": "testRunner", @@ -368,21 +301,22 @@ }, "data": { "failCommands": [ - "find" + "insert" ], "blockConnection": true, - "blockTimeMS": 750 + "blockTimeMS": 150 } } } }, { - "name": "findOne", + "name": "insertOne", "object": "coll", "arguments": { - "timeoutMS": 50, - "filter": { - "_id": 1 + "timeoutMS": 75, + "document": { + "_id": 3, + "x": 1 } }, "expectError": { @@ -393,9 +327,13 @@ "name": "findOne", "object": "coll", "arguments": { + "timeoutMS": 50, "filter": { "_id": 1 } + }, + "expectError": { + "isTimeoutError": true } } ], @@ -404,13 +342,13 @@ "client": "client", "events": [ { - "commandFailedEvent": { - "commandName": "find" + "commandSucceededEvent": { + "commandName": "ping" } }, { - "commandSucceededEvent": { - "commandName": "find" + "commandFailedEvent": { + "commandName": "insert" } } ] @@ -430,14 +368,30 @@ }, { "connectionCheckedInEvent": {} + }, + { + "connectionPendingReadStarted": {} + }, + { + "connectionPendingReadFailed": {} } ] } ] }, { - "description": "Read operation with unsuccessful pending read", + "description": "timeout write op exceeds pending response timeout", "operations": [ + { + "name": "runCommand", + "object": "test", + "command_name": "ping", + "arguments": { + "command": { + "ping": 1 + } + } + }, { "name": "failPoint", "object": "testRunner", @@ -450,32 +404,40 @@ }, "data": { "failCommands": [ - "find" + "insert" ], "blockConnection": true, - "blockTimeMS": 1100 + "blockTimeMS": 150 } } } }, { - "name": "findOne", + "name": "insertOne", "object": "coll", "arguments": { - "timeoutMS": 50, - "filter": { - "_id": 1 + "timeoutMS": 75, + "document": { + "_id": 3, + "x": 1 } }, "expectError": { "isTimeoutError": true } }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 3000 + } + }, { "name": "findOne", "object": "coll", "arguments": { - "timeoutMS": 1000, + "timeoutMS": 50, "filter": { "_id": 1 } @@ -489,9 +451,14 @@ { "client": "client", "events": [ + { + "commandSucceededEvent": { + "commandName": "ping" + } + }, { "commandFailedEvent": { - "commandName": "find" + "commandName": "insert" } } ] @@ -506,6 +473,18 @@ { "connectionCheckedInEvent": {} }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingReadStarted": {} + }, + { + "connectionPendingReadFailed": {} + }, { "connectionClosedEvent": { "reason": "error" diff --git a/source/client-side-operations-timeout/tests/connection-churn.yml b/source/client-side-operations-timeout/tests/pending-response.yml similarity index 53% rename from source/client-side-operations-timeout/tests/connection-churn.yml rename to source/client-side-operations-timeout/tests/pending-response.yml index 12fca8e548..e504c4bcc6 100644 --- a/source/client-side-operations-timeout/tests/connection-churn.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -1,14 +1,14 @@ -description: "Operation timeouts do not cause connection churn" +description: "Connection churn is prevented by reading pending responses during connection checkout" schemaVersion: "1.9" runOnRequirements: - minServerVersion: "4.4" - # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired - # after maxTimeMS, whereas mongod returns it after - # max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests + # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired + # after maxTimeMS, whereas mongod returns it after + # max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests # will not pass on sharded clusters. - topologies: ["standalone", "replicaset"] + topologies: ["single", "replicaset"] createEntities: - client: @@ -25,6 +25,9 @@ createEntities: - connectionCheckedOutEvent - connectionCheckedInEvent - connectionClosedEvent + - connectionPendingReadSucceeded + - connectionPendingReadStarted + - connectionPendingReadFailed - database: id: &database test client: *client @@ -40,8 +43,19 @@ initialData: documents: [] tests: - - description: "Write operation with successful pending read" + # Attempting pending response read on a non-timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. + - description: "non-timeout write op with response and no error" operations: + # Run a ping command to pre-load the pool with a connection. + - name: runCommand + object: *database + command_name: ping + arguments: + command: + ping: 1 + # Create a failpoint to block the first operation - name: failPoint object: testRunner @@ -53,13 +67,13 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 750 + blockTimeMS: 100 # Execute operation with timeout less than block time - name: insertOne object: *collection arguments: - timeoutMS: 50 + timeoutMS: 75 document: { _id: 3, x: 1 } expectError: isTimeoutError: true @@ -73,6 +87,8 @@ tests: expectEvents: - client: *client events: + - commandSucceededEvent: + commandName: ping # pre-loading the connection pool. - commandFailedEvent: commandName: insert - commandSucceededEvent: @@ -80,98 +96,27 @@ tests: - client: *client eventType: cmap events: - - connectionCheckedOutEvent: {} # insert - - connectionCheckedInEvent: {} # insert fails - - connectionCheckedOutEvent: {} # find - - connectionCheckedInEvent: {} # find succeeds + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # ping finishes + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # insert fails + - connectionPendingReadStarted: {} + - connectionPendingReadSucceeded: {} # find op drains conn + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # find succeeds - - description: "Concurrent write operation with successful pending read" + # Attempting pending response read on a non-timeout operation that gets no + # response from the server after 3s should close the connection. + - description: "non-timeout op with no response" operations: - # Create a failpoint to block the first operation - - name: failPoint - object: testRunner + # Run a ping command to pre-load the pool with a connection. + - name: runCommand + object: *database + command_name: ping arguments: - client: *failPointClient - failPoint: - configureFailPoint: failCommand - mode: { times: 1 } - data: - failCommands: ["insert"] - blockConnection: true - blockTimeMS: 750 - - # Start threads. - - name: createEntities - object: testRunner - arguments: - entities: - - thread: - id: &thread0 thread0 - - thread: - id: &thread1 thread1 + command: + ping: 1 - # Run an insert in two threads. We expect the first to time out and the - # second to finish the pending read from the first and complete - # successfully. - - name: runOnThread - object: testRunner - arguments: - thread: *thread0 - operation: - name: insertOne - object: *collection - arguments: - timeoutMS: 500 - document: - _id: 2 - expectError: - isTimeoutError: true - - # Ensure the first thread checks out a connection before executing the - # operation in the second thread. This maintains concurrent behavior but - # presents the worst case scenario. - - name: waitForEvent - object: testRunner - arguments: - client: *client - event: - connectionCheckedOutEvent: {} - count: 1 - - - name: runOnThread - object: testRunner - arguments: - thread: *thread1 - operation: - name: insertOne - object: *collection - arguments: - document: - _id: 3 - - # Stop threads. - - name: waitForThread - object: testRunner - arguments: - thread: *thread1 - - expectEvents: - - client: *client - events: - - commandFailedEvent: - commandName: insert - - commandSucceededEvent: - commandName: insert - - client: *client - eventType: cmap - events: - - connectionCheckedOutEvent: {} # insert - - connectionCheckedInEvent: {} # insert fails - - connectionCheckedOutEvent: {} # find - - connectionCheckedInEvent: {} # find succeeds - - - description: "Write operation with unsuccessful pending read" - operations: # Create a failpoint to block the first operation - name: failPoint object: testRunner @@ -183,7 +128,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 1100 + blockTimeMS: 3100 # Execute operation with timeout less than block time - name: insertOne @@ -194,32 +139,49 @@ tests: expectError: isTimeoutError: true - # The pending read should fail - - name: insertOne + # Execute a subsequent operation to complete the read + - name: findOne object: *collection arguments: - timeoutMS: 1000 - document: { _id: 3, x: 1 } + filter: { _id: 1 } expectError: isTimeoutError: true expectEvents: - client: *client events: + - commandSucceededEvent: + commandName: ping # pre-loading the connection pool. - commandFailedEvent: commandName: insert - # No second failed event since we timed out attempting to check out + # No second failed event since we timed out attempting to check out # the connection for the second operation - client: *client eventType: cmap events: - - connectionCheckedOutEvent: {} # first insert - - connectionCheckedInEvent: {} # first insert fails - - connectionClosedEvent: # second insert times out pending read in checkout, closes + + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # ping finishes + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # insert fails + - connectionPendingReadStarted: {} + - connectionPendingReadFailed: {} + - connectionClosedEvent: reason: error - - description: "Read operation with successful pending read" + # Attempting pending response read on a realistic timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. + - description: "timeout write op with response and no error" operations: + # Run a ping command to pre-load the pool with a connection. + - name: runCommand + object: *database + command_name: ping + arguments: + command: + ping: 1 + # Create a failpoint to block the first operation - name: failPoint object: testRunner @@ -229,16 +191,16 @@ tests: configureFailPoint: failCommand mode: { times: 1 } data: - failCommands: ["find"] + failCommands: ["insert"] blockConnection: true - blockTimeMS: 750 + blockTimeMS: 150 # Execute operation with timeout less than block time - - name: findOne + - name: insertOne object: *collection arguments: - timeoutMS: 50 - filter: { _id: 1 } + timeoutMS: 75 + document: { _id: 3, x: 1 } expectError: isTimeoutError: true @@ -246,25 +208,41 @@ tests: - name: findOne object: *collection arguments: + timeoutMS: 50 filter: { _id: 1 } + expectError: + isTimeoutError: true expectEvents: - client: *client events: - - commandFailedEvent: - commandName: find - commandSucceededEvent: - commandName: find + commandName: ping # pre-loading the connection pool. + - commandFailedEvent: + commandName: insert - client: *client eventType: cmap events: - - connectionCheckedOutEvent: {} # first find - - connectionCheckedInEvent: {} # first find fails - - connectionCheckedOutEvent: {} # second find - - connectionCheckedInEvent: {} # second find succeeds + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # ping finishes + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # insert fails + - connectionPendingReadStarted: {} + - connectionPendingReadFailed: {} # find times out - - description: "Read operation with unsuccessful pending read" + # Attempting pending response read on a realistic timeout operation that gets + # no response from the server after the pending response start time has been + # exceeded should close. + - description: "timeout write op exceeds pending response timeout" operations: + # Run a ping command to pre-load the pool with a connection. + - name: runCommand + object: *database + command_name: ping + arguments: + command: + ping: 1 + # Create a failpoint to block the first operation - name: failPoint object: testRunner @@ -274,24 +252,31 @@ tests: configureFailPoint: failCommand mode: { times: 1 } data: - failCommands: ["find"] + failCommands: ["insert"] blockConnection: true - blockTimeMS: 1100 + blockTimeMS: 150 # Execute operation with timeout less than block time - - name: findOne + - name: insertOne object: *collection arguments: - timeoutMS: 50 - filter: { _id: 1 } + timeoutMS: 75 + document: { _id: 3, x: 1 } expectError: isTimeoutError: true - # The pending read should fail + # Wait for the pending read timeout period before executing the next + # operation. + - name: wait + object: testRunner + arguments: + ms: 3000 + + # Execute a subsequent operation to complete the read - name: findOne object: *collection arguments: - timeoutMS: 1000 + timeoutMS: 50 filter: { _id: 1 } expectError: isTimeoutError: true @@ -299,14 +284,19 @@ tests: expectEvents: - client: *client events: + - commandSucceededEvent: + commandName: ping # pre-loading the connection pool. - commandFailedEvent: - commandName: find - # No second failed event since we timed out attempting to check out - # the connection for the second operation + commandName: insert - client: *client eventType: cmap events: - - connectionCheckedOutEvent: {} # first find - - connectionCheckedInEvent: {} # first find fails - - connectionClosedEvent: # second find times out pending read in checkout, closes + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # ping finishes + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # insert fails + - connectionPendingReadStarted: {} + - connectionPendingReadFailed: {} # find times out + - connectionClosedEvent: reason: error + From dde9e2220fd65198e0467b6314f8d98b285b25cc Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Tue, 22 Apr 2025 20:08:48 -0600 Subject: [PATCH 11/31] DRIVERS-2884 Add UML and update wording --- .../connection-monitoring-and-pooling.md | 142 ++++++++++++------ 1 file changed, 97 insertions(+), 45 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 11c9049f1b..99eb43d917 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -576,53 +576,105 @@ other threads from checking out [Connections](#connection) while establishing a Before a given [Connection](#connection) is returned from checkOut, it must be marked as "in use", and the pool's availableConnectionCount MUST be decremented. -If an operation times out the socket while awaiting a server response, the driver MUST mark the connection as "pending." -When this connection is next checked out the driver MUST attempt to complete the read. This process should continue -until the response is successfully read or the cumulative `pendingResponseTimeoutLimit` of 400ms is reached. If the `pendingResponseTimeoutLimit` is reached, the connection MUST be closed. The goal of this procedure is to minimize -connection churn by attempting to empty server responses under CSOT conditions rather than strictly closing a -connection. - +If an operation times out the socket while awaiting a server response and CSOT is enabled and maxTimeMS was added to the +command, the driver MUST mark the connection as "pending" and record the current time in a way that can be updated. The +next time the connection is checked out, the driver MUST attempt to read and discard the remaining response from the +socket. + +When performing a pending response read: + +- The connection MUST persist the timestamp recorded immediately after the original socket timeout, and this timestamp +MUST be updated to the current time whenever any data is successfully read from the socket during a pending response read +attempt. +- If the connection remains idle (i.e., no data is read) for more than 3 seconds since the pending state began or since +the last successful read, the driver MUST attempt to verify the connection's health by either performing a non-blocking +read or using the minimal possible timeout to check if at least one byte can be read. +- If a user-provided timeout is specified for the pending response read, the driver MUST use the minimum of the +remaining time before the 3-second pending-response window elapses and the user-provided timeout as the effective +timeout for the read operation. +- If no user-provided timeout is specified, the driver MUST use the minimum of the remaining 3-second pending-response +window and the socketTimeoutMS (if supported by the driver) as the effective timeout for the read operation. +- If reading from the socket results in an error that is not a timeout, or if the connection exceeds the 3-second +pending-response window, the driver MUST close the connection. +- If the pending response is fully read and successfully discarded, and the connection remains healthy, the pending +state may be cleared and the connection MAY be returned to the pool for reuse. + +```mermaid +sequenceDiagram + participant Driver + participant Pool + participant Conn as Connection (*) + participant Server + + Driver->>Pool: Checkout Connection (*) + Pool->>Driver: Return connection (*) + + Driver->>Conn: Send operation (1) (CSOT enabled, maxTimeMS > 0) + Conn->>Server: Send command + Server-->>Conn: (No response, socket times out) + Conn->>Conn: Mark as "pending", record current time + Conn-->>Driver: Error + + Driver->>Pool: Checkout Connection (*) + Pool->>Driver: Return connection (*) + + Driver->>Conn: Send operation (2) + + + Conn->>Server: Attempt to discard pending response from operation (1) + Conn->>Conn: Update pending read timestamp if bytes read + + alt Timeout window exceeded or non-timeout error + Conn->>Conn: Close connection + Conn-->>Driver: Error + else Timeout window not exceeded + alt Error + Conn->>Conn: Clear pending read state + Conn->>Conn: Reset to current time + Conn->>Pool: Check connection back into pool + Conn-->>Driver: Error + else No error + Conn->>Conn: Clear pending read state + Conn->>Driver: Return connection to execute operation + end + end +``` ```python -def await_pending_read(pool, conn): - # If there are no bytes pending read, do nothing - if conn.await_remaining_bytes is None: - return None - - size = conn.await_remaining_bytes - check_in = False - - try: - start_time = current_time() - - if size == 0: - size_buf = conn.read_bytes(4) - if size_buf is None: - conn.remaining_time_for_pending_read -= time_since(start_time) - check_in = True - raise Exception("Error reading the message size") - - size = conn.parse_message_size(size_buf) - 4 - - bytes_read = conn.discard_bytes(size) - if bytes_read < size: - # If the read times out, record the bytes left to read before exiting - if bytes_read == 0 and conn.is_timeout_error(): - conn.await_remaining_bytes += size - conn.remaining_for_pending_read -= time_since(start_time) - - check_in = True - raise Exception(f"Error discarding {size} byte message") - - conn.await_remaining_bytes = None - conn.remaining_time_for_pending_read = None - - finally: - if conn.remaining_time_for_pending_read is not None and conn.remaining_time_for_pending_read < 0: - conn.close() - elif check_in: - pool.check_in(conn) +PENDING_RESPONSE_TIMEOUT_MS = 3000 # static timeout + +def await_pending_response(timeout, conn): + # Note: conn.pending_start is initialized after the original socket timeout + # and not in this function since the connection will sit in the pool for some + # non-deterministic amount of time after the socket timeout. + + remaining_time = (conn.pending_start + PENDING_RESPONSE_TIMEOUT_MS) - current_time() + if remaining_time <= 0: + # Use the smallest timeout (or enable non-blocking read). + remaining_time = 0.001 + + if timeout is None: + timeout = min(remaining_time, conn.socket_timeout_ms) + else: + timeout = min(remaining_time, timeout) + + data, error = execute_pending_response(timeout, conn) + end_time = current_time() + + if error is not None and error is not timeout: + close_connection(conn) + raise + + if len(data) > 0: + # Refresh the remaining time upon a successful read + conn.pending_start = end_time + + # Check if the remaining time has been exceeded + if end_time - conn.pending_start >= PENDING_RESPONSE_TIMEOUT_MS: + close_connection(conn) + + if error is not None: + raise error ``` - ```python ```text connection = Null From 5e0305adcf3935134fe0096604ec271aa0c45326 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Tue, 22 Apr 2025 20:11:15 -0600 Subject: [PATCH 12/31] DRIVERS-2884 Remove uneeded text from code snippet --- .../connection-monitoring-and-pooling.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 99eb43d917..9b5ddba2cd 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -676,7 +676,6 @@ def await_pending_response(timeout, conn): raise error ``` ```python -```text connection = Null tConnectionCheckOutStarted = current instant (use a monotonic clock if possible) emit ConnectionCheckOutStartedEvent and equivalent log message @@ -734,7 +733,7 @@ if connection state is "pending": else: decrement availableConnectionCount -error = await_pending_read(pool, connection) +error = await_pending_response(pool, connection) if error: return error From 496724c4f3b61acd75c59bae8b4caf4ac015c078 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 23 Apr 2025 15:28:03 -0600 Subject: [PATCH 13/31] DRIVERS-2884 Add prose tests --- .../tests/README.md | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index 3d8aee40aa..c08d7e25fc 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -21,6 +21,47 @@ The following tests have not yet been automated, but MUST still be tested: 5. When a check out attempt fails because connection set up throws an error, assert that a ConnectionCheckOutFailedEvent with reason="connectionError" is emitted. +### Pending Response: Connection Aliveness Tests + +If a connection with a pending response is idle for > 3 seconds, then drivers are expected to perform an aliveness check +by attempting a non-blocking read of 1 byte from the inbound TCP buffer. The following two cases test both a successful +read and a failed one. + +#### Connection Aliveness Check Fails + +1. Initialize a mock TCP listener to simulate the server-side behavior. The listener should write at least 5 bytes to +the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be +delayed by 2x the size of the socket timeout. +2. Implement a monitoring mechanism to capture the `ConnectionPendingReadFailed` event +3. Instantiate a connection pool using the mock listener’s address, ensuring readiness without error. Attach the event +monitor to observe the connection’s state. +4. Check out a connection from the pool and initiate a read operation with an appropriate socket timeout (e.g, 10ms) +that will trigger a timeout due to the artificial delay of 2x the socket timeout (step 1). Ensure that the read +operation returns a timeout error. +5. Check the connection back into the pool and sleep for 3 seconds so that the pending response state timestamp exceeds +the pending response timeout, forcing an aliveness check. +6. Check the connection out. The aliveness check should fail since no additional bytes were added after the delay in +step 1. + + +#### Connection Aliveness Check Succeeds + +1. Initialize a mock TCP listener to simulate the server-side behavior. The listener should write at least 5 bytes to +the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be +delayed by 2x the size of the socket timeout. Write at least 1 additional byte after the delay so that the aliveness +check succeeds (e.g. `0xAA`). +2. Implement a monitoring mechanism to capture the `ConnectionPendingReadFailed` event +3. Instantiate a connection pool using the mock listener’s address, ensuring readiness without error. Attach the event +monitor to observe the connection’s state. +4. Check out a connection from the pool and initiate a read operation with an appropriate socket timeout (e.g, 10ms) +that will trigger a timeout due to the artificial delay of 2x the socket timeout (step 1). Ensure that the read +operation returns a timeout error. +5. Check the connection back into the pool and sleep for 3 seconds so that the pending response state timestamp exceeds +the pending response timeout, forcing an aliveness check. +6. Check the connection out. The aliveness check should succeed since no additional bytes were added after the delay in +step 1. + + ## Logging Tests Tests for connection pool logging can be found in the `/logging` subdirectory and are written in the From 258edf86f7eeac9fa8aec8dda1412339f9faaf05 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 23 Apr 2025 15:53:31 -0600 Subject: [PATCH 14/31] DRIVERS-2884 Clean up presentation --- .../connection-monitoring-and-pooling.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 9b5ddba2cd..eceddd42fb 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -576,14 +576,12 @@ other threads from checking out [Connections](#connection) while establishing a Before a given [Connection](#connection) is returned from checkOut, it must be marked as "in use", and the pool's availableConnectionCount MUST be decremented. -If an operation times out the socket while awaiting a server response and CSOT is enabled and maxTimeMS was added to the -command, the driver MUST mark the connection as "pending" and record the current time in a way that can be updated. The -next time the connection is checked out, the driver MUST attempt to read and discard the remaining response from the -socket. +If an operation times out the socket while awaiting a server response and CSOT is enabled and `maxTimeMS` was added to +the command, the driver MUST mark the connection as "pending" and record the current time in a way that can be updated. +The next time the connection is checked out, the driver MUST attempt to read and discard the remaining response from +the socket. The workflow for this is as follows: -When performing a pending response read: - -- The connection MUST persist the timestamp recorded immediately after the original socket timeout, and this timestamp +- The connection MUST persist the current time recorded immediately after the original socket timeout, and this timestamp MUST be updated to the current time whenever any data is successfully read from the socket during a pending response read attempt. - If the connection remains idle (i.e., no data is read) for more than 3 seconds since the pending state began or since From d217d1074ec0dba7c368cb3d7e7e05dbd94efbc0 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Thu, 24 Apr 2025 16:16:04 -0600 Subject: [PATCH 15/31] DRIVERS-2884 Add logs and events --- .../connection-monitoring-and-pooling.md | 123 ++++++++++++++++++ .../tests/README.md | 10 +- 2 files changed, 131 insertions(+), 2 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index eceddd42fb..2dd0543ff3 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1099,6 +1099,79 @@ interface ConnectionCheckedInEvent { */ connectionId: int64; } + +/** + * Emitted when the connection being checked out is attempting to read and + * discard a pending server response. + */ +interface PendingResponseStarted { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestID: int64; +} + +/** + * Emitted when the connection successfully read the pending read and is ready + * to be checked out. + */ +interface PendingResponseSucceeded { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The time it took to complete the pending read. + */ + duration: Duration; +} + +/** + * Emitted when the connection being checked out failed to complete the pending + * read. + */ +interface PendingResponseFailed { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestID: int64; + + /** + * Time in milliseconds remaining for the next pending read attempt. + */ + msRemaining: int64; + + /** + * The reason for why the pending read failed. + */ + reason: string; +} ``` ### Connection Pool Logging @@ -1296,6 +1369,56 @@ placeholders as appropriate: > Connection checked in: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}} +#### Connection Pending Response Started + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +|---------------------|----------------|-----------------------------------------------| +| message | string | "Pending response started" | +| driverConnectionID | int64 | The driver-generated ID for the connection | +| requestID | int64 | The driver-generated request ID associated with the network timeout | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, +> request ID={{requestID}} + +#### Connection Pending Response Succeeded + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +|---------------------|----------------|--------------------------------------------| +| message | string | "Pending response succeeded" | +| driverConnectionID | int64 | The driver-generated ID for the connection | +| durationMS | Int32/Int64/Double | The time it took to complete the pending read | + + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, +> DurationMS={{durationMS}} ms + +#### Connection Pending Response Failed + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +|---------------------|----------------|--------------------------------------------------------------| +| message | string | "Pending response failed" | +| driverConnectionID | int64 | The driver-generated ID for the connection | +| remainingtimeMS | int64 | Remaining time for the next pending response read attempt | +| reason | string | The reason for why the pending response read failed | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, +> remaining time={{remainingtimeMS}} ms, reason={{reason}} + ### Connection Pool Errors A connection pool throws errors in specific circumstances. These Errors MUST be emitted by the pool. Errors SHOULD be diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index c08d7e25fc..49134dc98c 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -32,7 +32,8 @@ read and a failed one. 1. Initialize a mock TCP listener to simulate the server-side behavior. The listener should write at least 5 bytes to the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be delayed by 2x the size of the socket timeout. -2. Implement a monitoring mechanism to capture the `ConnectionPendingReadFailed` event +2. Implement a monitoring mechanism to capture the `ConnectionPendingResponseStarted` and `ConnectionPendingResponseFailed` +events. 3. Instantiate a connection pool using the mock listener’s address, ensuring readiness without error. Attach the event monitor to observe the connection’s state. 4. Check out a connection from the pool and initiate a read operation with an appropriate socket timeout (e.g, 10ms) @@ -42,6 +43,8 @@ operation returns a timeout error. the pending response timeout, forcing an aliveness check. 6. Check the connection out. The aliveness check should fail since no additional bytes were added after the delay in step 1. +7. Verify that one event for each `ConnectionPendingResponseStarted` and `ConnectionPendingResponseFailed` was emitted. +Also verify that the fields were correctly set for each event. #### Connection Aliveness Check Succeeds @@ -50,7 +53,8 @@ step 1. the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be delayed by 2x the size of the socket timeout. Write at least 1 additional byte after the delay so that the aliveness check succeeds (e.g. `0xAA`). -2. Implement a monitoring mechanism to capture the `ConnectionPendingReadFailed` event +2. Implement a monitoring mechanism to capture the `ConnectionPendingResponseStarted` and `ConnectionPendingResponseSucceeded` +events. 3. Instantiate a connection pool using the mock listener’s address, ensuring readiness without error. Attach the event monitor to observe the connection’s state. 4. Check out a connection from the pool and initiate a read operation with an appropriate socket timeout (e.g, 10ms) @@ -60,6 +64,8 @@ operation returns a timeout error. the pending response timeout, forcing an aliveness check. 6. Check the connection out. The aliveness check should succeed since no additional bytes were added after the delay in step 1. +7. Verify that one event for each `ConnectionPendingResponseStarted` and `ConnectionPendingResponseSucceeded` was emitted. +Also verify that the fields were correctly set for each event. ## Logging Tests From cc8aec0119bd43fe411a4bb83e37a67ae6b0b129 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Thu, 24 Apr 2025 17:28:31 -0600 Subject: [PATCH 16/31] DRIVERS-2884 Add log part --- .../connection-monitoring-and-pooling.md | 14 +- .../tests/logging/connection-logging.json | 541 ++++++++++++++++++ .../tests/logging/connection-logging.yml | 234 ++++++++ 3 files changed, 782 insertions(+), 7 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 2dd0543ff3..ff110f7b8e 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1140,6 +1140,11 @@ interface PendingResponseSucceeded { * The time it took to complete the pending read. */ duration: Duration; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestID: int64; } /** @@ -1162,11 +1167,6 @@ interface PendingResponseFailed { */ requestID: int64; - /** - * Time in milliseconds remaining for the next pending read attempt. - */ - msRemaining: int64; - /** * The reason for why the pending read failed. */ @@ -1393,6 +1393,7 @@ In addition to the common fields defined above, this message MUST contain the fo |---------------------|----------------|--------------------------------------------| | message | string | "Pending response succeeded" | | driverConnectionID | int64 | The driver-generated ID for the connection | +| requestID | int64 | The driver-generated request ID associated with the network timeout | | durationMS | Int32/Int64/Double | The time it took to complete the pending read | @@ -1410,14 +1411,13 @@ In addition to the common fields defined above, this message MUST contain the fo |---------------------|----------------|--------------------------------------------------------------| | message | string | "Pending response failed" | | driverConnectionID | int64 | The driver-generated ID for the connection | -| remainingtimeMS | int64 | Remaining time for the next pending response read attempt | | reason | string | The reason for why the pending response read failed | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: > Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, -> remaining time={{remainingtimeMS}} ms, reason={{reason}} +> reason={{reason}} ### Connection Pool Errors diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json index 72103b3cab..f2a0734744 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json @@ -518,6 +518,547 @@ ] } ] + }, + { + "description": "force a pending response read, fail first try, succeed second try", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "retryReads": false, + "appname": "clientAppName", + "heartbeatFrequencyMS": 10000 + }, + "observeLogMessages": { + "connection": "debug" + } + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "runCommand", + "object": "test", + "command_name": "ping", + "arguments": { + "command": { + "ping": 1 + } + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 200 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "findOne", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + }, + "timeoutMS": 100 + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 75 + } + }, + { + "name": "findOne", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectLogMessages": [ + { + "client": "client", + "ignoreExtraMessages": true, + "messages": [ + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection pool created", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection pool ready", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection created", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection ready", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response failed", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + }, + "reason": "timeout" + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response succeeded", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + } + ] + } + ] } ] } diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml index 49868a062e..74a233b3a7 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml @@ -222,3 +222,237 @@ tests: reason: "An error occurred while trying to establish a new connection" error: { $$exists: true } durationMS: { $$type: [double, int, long] } + + - description: "force a pending response read, fail first try, succeed second try" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + retryReads: false + appname: &clientAppName clientAppName + # use a high heartbeatFrequencyMS to avoid a successful monitor check marking the pool as + # ready (and emitting another event) during the course of test execution. + heartbeatFrequencyMS: 10000 + observeLogMessages: + connection: debug + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection + + # Run a ping command to pre-load the pool with a connection. + - name: runCommand + object: *database + command_name: ping + arguments: + command: + ping: 1 + + # Create a failpoint to block the first operation + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 1 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 200 + + # Execute operation with timeout less than block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # Execute a subsequent operation which should time out during the + # pending response read attempt. + - name: findOne + object: *collection + arguments: + filter: { _id: 1 } + timeoutMS: 100 + expectError: + isTimeoutError: true + + # Wait for the blocked connection to free up. + - name: wait + # operation. + object: testRunner + arguments: + ms: 75 + + # Execute a subsequent operation which should completed the pending read. + - name: findOne + object: *collection + arguments: + filter: { _id: 1 } + + expectLogMessages: + - client: *client + ignoreExtraMessages: true + messages: + - level: debug + component: connection + data: + message: "Connection pool created" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection pool ready" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # Ping + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection created" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection ready" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # Insert + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # First Find + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response failed" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + reason: "timeout" + + # Second Find + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response succeeded" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } From 3d98039c14cbe4bcd0b4835e7074dbc26f333e09 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 13:27:25 -0600 Subject: [PATCH 17/31] DRIVERS-2884 Add Q&A section --- .../connection-monitoring-and-pooling.md | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index ff110f7b8e..100062a567 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1564,6 +1564,34 @@ some equivalent configuration, but this configuration will also require target f 5.0. The advantage of using Background Thread to manage perished connections is that it will work regardless of environment setup. +### Why is pending response read timeout not calculated dynamically? Why is it specifically 3 seconds? + +Using a dynamic timeout introduces additional complexity. In particular, RTT is an unreliable metric for predicting +future operation latency as both server and network conditions are unpredictable. Benchmarks demonstrate that even in +slow, high-latency scenarios (e.g. reading a 16MiB document over cross-country), reads reliably complete in under 1 +second. The 3-second timeout ensures that every realistic pending response read will not result in premature connection +closure, but will still close in pathological conditions (e.g., a dead server or true network outage). + +### Why is the pending response read timeout not configurable? + +Because of the ["no knobs" mantra](https://github.com/mongodb/specifications/blob/master/source/driver-mantras.md#no-knobs). +We can always reconsider this in the future + +### Why does the pending response read timeout include the time the connection is idle in the pool? + +The pending response read timeout includes idle time in the pool so that stale or unusable connections (e.g., if the +socket is dead) are detected and closed promptly instead of incurring an additional 3-second wait upon checkout. By +tracking the timeout during idle periods, we ensure that the driver can quickly determine if the connection should be +closed with a fast, non-blocking check as soon as it’s checked out, avoiding introducing unnecessary latency while still +protecting connection availability. This approach maintains a balance between minimizing connection churn and ensuring +users don't encounter avoidable delays. + +### Why do we refresh the pending response read timeout each time we successfully read bytes from the TCP stream? + +By refreshing the timeout after each successful read, we acknowledge that progress is being made, and we provide a new +time window for the next segment of data to arrive. Refreshing is less costly than re-establishing a connection since +there is no reason to believe that a new connection would reduce latency. + ## Backwards Compatibility As mentioned in [Deprecated Options](#deprecated-options), some drivers currently implement the options `waitQueueSize` From 07e75bde6b58112fafabd02cc899b4cfc06066b1 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 13:34:24 -0600 Subject: [PATCH 18/31] DRIVERS-2884 Add changelog --- .../connection-monitoring-and-pooling.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 100062a567..f89fdbe86b 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1667,8 +1667,7 @@ to close and remove from its pool a [Connection](#connection) which has unread e - 2023-10-04: Commit to the currently specified requirements regarding durations in events. -- 2024-01-23: Migrated from reStructuredText to Markdown. - -- 2024-10-30: Attempt pending reads during connection check-out for sockets that have timed out. +- Improved connection reuse and availability by allowing pending server responses to be read with a 3-second static +timeout after a client-side operation timeout. ______________________________________________________________________ From 8d9e71b17e0aeeb1b5de5c0775dd9a723f02adcc Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 13:44:46 -0600 Subject: [PATCH 19/31] DRIVERS-2884 Fix Markdown failures --- .../connection-monitoring-and-pooling.md | 88 ++++++++++--------- .../tests/README.md | 48 +++++----- 2 files changed, 68 insertions(+), 68 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index f89fdbe86b..ef8919c08e 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -578,24 +578,24 @@ availableConnectionCount MUST be decremented. If an operation times out the socket while awaiting a server response and CSOT is enabled and `maxTimeMS` was added to the command, the driver MUST mark the connection as "pending" and record the current time in a way that can be updated. -The next time the connection is checked out, the driver MUST attempt to read and discard the remaining response from -the socket. The workflow for this is as follows: - -- The connection MUST persist the current time recorded immediately after the original socket timeout, and this timestamp -MUST be updated to the current time whenever any data is successfully read from the socket during a pending response read -attempt. -- If the connection remains idle (i.e., no data is read) for more than 3 seconds since the pending state began or since -the last successful read, the driver MUST attempt to verify the connection's health by either performing a non-blocking -read or using the minimal possible timeout to check if at least one byte can be read. -- If a user-provided timeout is specified for the pending response read, the driver MUST use the minimum of the -remaining time before the 3-second pending-response window elapses and the user-provided timeout as the effective -timeout for the read operation. -- If no user-provided timeout is specified, the driver MUST use the minimum of the remaining 3-second pending-response -window and the socketTimeoutMS (if supported by the driver) as the effective timeout for the read operation. -- If reading from the socket results in an error that is not a timeout, or if the connection exceeds the 3-second -pending-response window, the driver MUST close the connection. -- If the pending response is fully read and successfully discarded, and the connection remains healthy, the pending -state may be cleared and the connection MAY be returned to the pool for reuse. +The next time the connection is checked out, the driver MUST attempt to read and discard the remaining response from the +socket. The workflow for this is as follows: + +- The connection MUST persist the current time recorded immediately after the original socket timeout, and this + timestamp MUST be updated to the current time whenever any data is successfully read from the socket during a + pending response read attempt. +- If the connection remains idle (i.e., no data is read) for more than 3 seconds since the pending state began or since + the last successful read, the driver MUST attempt to verify the connection's health by either performing a + non-blocking read or using the minimal possible timeout to check if at least one byte can be read. +- If a user-provided timeout is specified for the pending response read, the driver MUST use the minimum of the + remaining time before the 3-second pending-response window elapses and the user-provided timeout as the effective + timeout for the read operation. +- If no user-provided timeout is specified, the driver MUST use the minimum of the remaining 3-second pending-response + window and the socketTimeoutMS (if supported by the driver) as the effective timeout for the read operation. +- If reading from the socket results in an error that is not a timeout, or if the connection exceeds the 3-second + pending-response window, the driver MUST close the connection. +- If the pending response is fully read and successfully discarded, and the connection remains healthy, the pending + state may be cleared and the connection MAY be returned to the pool for reuse. ```mermaid sequenceDiagram @@ -637,7 +637,8 @@ sequenceDiagram end end ``` -```python + +```python PENDING_RESPONSE_TIMEOUT_MS = 3000 # static timeout def await_pending_response(timeout, conn): @@ -673,6 +674,7 @@ def await_pending_response(timeout, conn): if error is not None: raise error ``` + ```python connection = Null tConnectionCheckOutStarted = current instant (use a monotonic clock if possible) @@ -1373,29 +1375,28 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -|---------------------|----------------|-----------------------------------------------| -| message | string | "Pending response started" | -| driverConnectionID | int64 | The driver-generated ID for the connection | -| requestID | int64 | The driver-generated request ID associated with the network timeout | +| Key | Suggested Type | Value | +| ------------------ | -------------- | ------------------------------------------------------------------- | +| message | string | "Pending response started" | +| driverConnectionID | int64 | The driver-generated ID for the connection | +| requestID | int64 | The driver-generated request ID associated with the network timeout | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: -> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, -> request ID={{requestID}} +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request +> ID={{requestID}} #### Connection Pending Response Succeeded In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -|---------------------|----------------|--------------------------------------------| -| message | string | "Pending response succeeded" | -| driverConnectionID | int64 | The driver-generated ID for the connection | -| requestID | int64 | The driver-generated request ID associated with the network timeout | -| durationMS | Int32/Int64/Double | The time it took to complete the pending read | - +| Key | Suggested Type | Value | +| ------------------ | ------------------ | ------------------------------------------------------------------- | +| message | string | "Pending response succeeded" | +| driverConnectionID | int64 | The driver-generated ID for the connection | +| requestID | int64 | The driver-generated request ID associated with the network timeout | +| durationMS | Int32/Int64/Double | The time it took to complete the pending read | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: @@ -1407,11 +1408,11 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -|---------------------|----------------|--------------------------------------------------------------| -| message | string | "Pending response failed" | -| driverConnectionID | int64 | The driver-generated ID for the connection | -| reason | string | The reason for why the pending response read failed | +| Key | Suggested Type | Value | +| ------------------ | -------------- | --------------------------------------------------- | +| message | string | "Pending response failed" | +| driverConnectionID | int64 | The driver-generated ID for the connection | +| reason | string | The reason for why the pending response read failed | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: @@ -1569,13 +1570,14 @@ environment setup. Using a dynamic timeout introduces additional complexity. In particular, RTT is an unreliable metric for predicting future operation latency as both server and network conditions are unpredictable. Benchmarks demonstrate that even in slow, high-latency scenarios (e.g. reading a 16MiB document over cross-country), reads reliably complete in under 1 -second. The 3-second timeout ensures that every realistic pending response read will not result in premature connection +second. The 3-second timeout ensures that every realistic pending response read will not result in premature connection closure, but will still close in pathological conditions (e.g., a dead server or true network outage). ### Why is the pending response read timeout not configurable? -Because of the ["no knobs" mantra](https://github.com/mongodb/specifications/blob/master/source/driver-mantras.md#no-knobs). -We can always reconsider this in the future +Because of the +["no knobs" mantra](https://github.com/mongodb/specifications/blob/master/source/driver-mantras.md#no-knobs). We can +always reconsider this in the future ### Why does the pending response read timeout include the time the connection is idle in the pool? @@ -1667,7 +1669,7 @@ to close and remove from its pool a [Connection](#connection) which has unread e - 2023-10-04: Commit to the currently specified requirements regarding durations in events. -- Improved connection reuse and availability by allowing pending server responses to be read with a 3-second static -timeout after a client-side operation timeout. +- Improved connection reuse and availability by allowing pending server responses to be read with a 3-second static + timeout after a client-side operation timeout. ______________________________________________________________________ diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index 49134dc98c..df4f45edc9 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -30,43 +30,41 @@ read and a failed one. #### Connection Aliveness Check Fails 1. Initialize a mock TCP listener to simulate the server-side behavior. The listener should write at least 5 bytes to -the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be -delayed by 2x the size of the socket timeout. -2. Implement a monitoring mechanism to capture the `ConnectionPendingResponseStarted` and `ConnectionPendingResponseFailed` -events. + the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be + delayed by 2x the size of the socket timeout. +2. Implement a monitoring mechanism to capture the `ConnectionPendingResponseStarted` and + `ConnectionPendingResponseFailed` events. 3. Instantiate a connection pool using the mock listener’s address, ensuring readiness without error. Attach the event -monitor to observe the connection’s state. + monitor to observe the connection’s state. 4. Check out a connection from the pool and initiate a read operation with an appropriate socket timeout (e.g, 10ms) -that will trigger a timeout due to the artificial delay of 2x the socket timeout (step 1). Ensure that the read -operation returns a timeout error. + that will trigger a timeout due to the artificial delay of 2x the socket timeout (step 1). Ensure that the read + operation returns a timeout error. 5. Check the connection back into the pool and sleep for 3 seconds so that the pending response state timestamp exceeds -the pending response timeout, forcing an aliveness check. + the pending response timeout, forcing an aliveness check. 6. Check the connection out. The aliveness check should fail since no additional bytes were added after the delay in -step 1. + step 1. 7. Verify that one event for each `ConnectionPendingResponseStarted` and `ConnectionPendingResponseFailed` was emitted. -Also verify that the fields were correctly set for each event. + Also verify that the fields were correctly set for each event. - -#### Connection Aliveness Check Succeeds +#### Connection Aliveness Check Succeeds 1. Initialize a mock TCP listener to simulate the server-side behavior. The listener should write at least 5 bytes to -the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be -delayed by 2x the size of the socket timeout. Write at least 1 additional byte after the delay so that the aliveness -check succeeds (e.g. `0xAA`). -2. Implement a monitoring mechanism to capture the `ConnectionPendingResponseStarted` and `ConnectionPendingResponseSucceeded` -events. + the connection to prevent size-related errors (e.g. `0x01, 0x02, 0x03, 0x04, 0x05, 0xFF`). The response should be + delayed by 2x the size of the socket timeout. Write at least 1 additional byte after the delay so that the + aliveness check succeeds (e.g. `0xAA`). +2. Implement a monitoring mechanism to capture the `ConnectionPendingResponseStarted` and + `ConnectionPendingResponseSucceeded` events. 3. Instantiate a connection pool using the mock listener’s address, ensuring readiness without error. Attach the event -monitor to observe the connection’s state. + monitor to observe the connection’s state. 4. Check out a connection from the pool and initiate a read operation with an appropriate socket timeout (e.g, 10ms) -that will trigger a timeout due to the artificial delay of 2x the socket timeout (step 1). Ensure that the read -operation returns a timeout error. + that will trigger a timeout due to the artificial delay of 2x the socket timeout (step 1). Ensure that the read + operation returns a timeout error. 5. Check the connection back into the pool and sleep for 3 seconds so that the pending response state timestamp exceeds -the pending response timeout, forcing an aliveness check. + the pending response timeout, forcing an aliveness check. 6. Check the connection out. The aliveness check should succeed since no additional bytes were added after the delay in -step 1. -7. Verify that one event for each `ConnectionPendingResponseStarted` and `ConnectionPendingResponseSucceeded` was emitted. -Also verify that the fields were correctly set for each event. - + step 1. +7. Verify that one event for each `ConnectionPendingResponseStarted` and `ConnectionPendingResponseSucceeded` was + emitted. Also verify that the fields were correctly set for each event. ## Logging Tests From 5c68f779a9d127bdafaa450cd37199ef3d84b251 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 15:12:09 -0600 Subject: [PATCH 20/31] DRIVERS-2884 Update schema --- .../tests/pending-response.json | 144 +- .../tests/pending-response.yml | 100 +- source/unified-test-format/schema-1.24.json | 1162 +++++++++++++++++ source/unified-test-format/tests/Makefile | 2 +- .../unified-test-format.md | 28 +- 5 files changed, 1227 insertions(+), 209 deletions(-) create mode 100644 source/unified-test-format/schema-1.24.json diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index d623a25b6e..266461d08b 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -1,6 +1,6 @@ { "description": "Connection churn is prevented by reading pending responses during connection checkout", - "schemaVersion": "1.9", + "schemaVersion": "1.24", "runOnRequirements": [ { "minServerVersion": "4.4", @@ -30,9 +30,9 @@ "connectionCheckedOutEvent", "connectionCheckedInEvent", "connectionClosedEvent", - "connectionPendingReadSucceeded", - "connectionPendingReadStarted", - "connectionPendingReadFailed" + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed" ] } }, @@ -65,7 +65,6 @@ { "name": "runCommand", "object": "test", - "command_name": "ping", "arguments": { "command": { "ping": 1 @@ -154,10 +153,10 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingReadStarted": {} + "connectionPendingResponseStarted": {} }, { - "connectionPendingReadSucceeded": {} + "connectionPendingResponseSucceeded": {} }, { "connectionCheckedOutEvent": {} @@ -262,10 +261,12 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingReadStarted": {} + "connectionPendingResponseStarted": {} }, { - "connectionPendingReadFailed": {} + "connectionPendingResponseFailed": { + "reason": "timeout" + } }, { "connectionClosedEvent": { @@ -304,7 +305,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 150 + "blockTimeMS": 250 } } } @@ -327,13 +328,10 @@ "name": "findOne", "object": "coll", "arguments": { - "timeoutMS": 50, + "timeoutMS": 200, "filter": { "_id": 1 } - }, - "expectError": { - "isTimeoutError": true } } ], @@ -350,115 +348,10 @@ "commandFailedEvent": { "commandName": "insert" } - } - ] - }, - { - "client": "client", - "eventType": "cmap", - "events": [ - { - "connectionCheckedOutEvent": {} }, - { - "connectionCheckedInEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - }, - { - "connectionPendingReadStarted": {} - }, - { - "connectionPendingReadFailed": {} - } - ] - } - ] - }, - { - "description": "timeout write op exceeds pending response timeout", - "operations": [ - { - "name": "runCommand", - "object": "test", - "command_name": "ping", - "arguments": { - "command": { - "ping": 1 - } - } - }, - { - "name": "failPoint", - "object": "testRunner", - "arguments": { - "client": "failPointClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "times": 1 - }, - "data": { - "failCommands": [ - "insert" - ], - "blockConnection": true, - "blockTimeMS": 150 - } - } - } - }, - { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 75, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - }, - { - "name": "wait", - "object": "testRunner", - "arguments": { - "ms": 3000 - } - }, - { - "name": "findOne", - "object": "coll", - "arguments": { - "timeoutMS": 50, - "filter": { - "_id": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - } - ], - "expectEvents": [ - { - "client": "client", - "events": [ { "commandSucceededEvent": { - "commandName": "ping" - } - }, - { - "commandFailedEvent": { - "commandName": "insert" + "commandName": "find" } } ] @@ -480,15 +373,16 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingReadStarted": {} + "connectionPendingResponseStarted": {} }, { - "connectionPendingReadFailed": {} + "connectionPendingResponseSucceeded": {} }, { - "connectionClosedEvent": { - "reason": "error" - } + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} } ] } diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index e504c4bcc6..cec6e5eed8 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -1,6 +1,6 @@ description: "Connection churn is prevented by reading pending responses during connection checkout" -schemaVersion: "1.9" +schemaVersion: "1.24" runOnRequirements: - minServerVersion: "4.4" @@ -25,9 +25,9 @@ createEntities: - connectionCheckedOutEvent - connectionCheckedInEvent - connectionClosedEvent - - connectionPendingReadSucceeded - - connectionPendingReadStarted - - connectionPendingReadFailed + - connectionPendingResponseSucceeded + - connectionPendingResponseStarted + - connectionPendingResponseFailed - database: id: &database test client: *client @@ -51,7 +51,6 @@ tests: # Run a ping command to pre-load the pool with a connection. - name: runCommand object: *database - command_name: ping arguments: command: ping: 1 @@ -100,8 +99,8 @@ tests: - connectionCheckedInEvent: {} # ping finishes - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # insert fails - - connectionPendingReadStarted: {} - - connectionPendingReadSucceeded: {} # find op drains conn + - connectionPendingResponseStarted: {} + - connectionPendingResponseSucceeded: {} # find op drains conn - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # find succeeds @@ -164,8 +163,9 @@ tests: - connectionCheckedInEvent: {} # ping finishes - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # insert fails - - connectionPendingReadStarted: {} - - connectionPendingReadFailed: {} + - connectionPendingResponseStarted: {} + - connectionPendingResponseFailed: + reason: timeout - connectionClosedEvent: reason: error @@ -193,7 +193,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 150 + blockTimeMS: 250 # Execute operation with timeout less than block time - name: insertOne @@ -208,10 +208,8 @@ tests: - name: findOne object: *collection arguments: - timeoutMS: 50 + timeoutMS: 200 filter: { _id: 1 } - expectError: - isTimeoutError: true expectEvents: - client: *client @@ -220,6 +218,8 @@ tests: commandName: ping # pre-loading the connection pool. - commandFailedEvent: commandName: insert + - commandSucceededEvent: + commandName: find - client: *client eventType: cmap events: @@ -227,76 +227,12 @@ tests: - connectionCheckedInEvent: {} # ping finishes - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # insert fails - - connectionPendingReadStarted: {} - - connectionPendingReadFailed: {} # find times out + - connectionPendingResponseStarted: {} + - connectionPendingResponseSucceeded: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # find succeeds # Attempting pending response read on a realistic timeout operation that gets - # no response from the server after the pending response start time has been + # no response from the server after the pending response start time has been # exceeded should close. - - description: "timeout write op exceeds pending response timeout" - operations: - # Run a ping command to pre-load the pool with a connection. - - name: runCommand - object: *database - command_name: ping - arguments: - command: - ping: 1 - - # Create a failpoint to block the first operation - - name: failPoint - object: testRunner - arguments: - client: *failPointClient - failPoint: - configureFailPoint: failCommand - mode: { times: 1 } - data: - failCommands: ["insert"] - blockConnection: true - blockTimeMS: 150 - - # Execute operation with timeout less than block time - - name: insertOne - object: *collection - arguments: - timeoutMS: 75 - document: { _id: 3, x: 1 } - expectError: - isTimeoutError: true - - # Wait for the pending read timeout period before executing the next - # operation. - - name: wait - object: testRunner - arguments: - ms: 3000 - - # Execute a subsequent operation to complete the read - - name: findOne - object: *collection - arguments: - timeoutMS: 50 - filter: { _id: 1 } - expectError: - isTimeoutError: true - - expectEvents: - - client: *client - events: - - commandSucceededEvent: - commandName: ping # pre-loading the connection pool. - - commandFailedEvent: - commandName: insert - - client: *client - eventType: cmap - events: - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # ping finishes - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # insert fails - - connectionPendingReadStarted: {} - - connectionPendingReadFailed: {} # find times out - - connectionClosedEvent: - reason: error diff --git a/source/unified-test-format/schema-1.24.json b/source/unified-test-format/schema-1.24.json new file mode 100644 index 0000000000..a2d92c5d03 --- /dev/null +++ b/source/unified-test-format/schema-1.24.json @@ -0,0 +1,1162 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Unified Test Format", + "type": "object", + "additionalProperties": false, + "required": [ + "description", + "schemaVersion", + "tests" + ], + "properties": { + "description": { + "type": "string" + }, + "schemaVersion": { + "$ref": "#/definitions/version" + }, + "runOnRequirements": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/runOnRequirement" + } + }, + "createEntities": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/entity" + } + }, + "initialData": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/collectionData" + } + }, + "tests": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/test" + } + }, + "_yamlAnchors": { + "type": "object", + "additionalProperties": true + } + }, + "definitions": { + "version": { + "type": "string", + "pattern": "^[0-9]+(\\.[0-9]+){1,2}$" + }, + "runOnRequirement": { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "maxServerVersion": { + "$ref": "#/definitions/version" + }, + "minServerVersion": { + "$ref": "#/definitions/version" + }, + "topologies": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "single", + "replicaset", + "sharded", + "sharded-replicaset", + "load-balanced" + ] + } + }, + "serverless": { + "type": "string", + "enum": [ + "require", + "forbid", + "allow" + ] + }, + "serverParameters": { + "type": "object", + "minProperties": 1 + }, + "auth": { + "type": "boolean" + }, + "authMechanism": { + "type": "string" + }, + "csfle": { + "type": "boolean" + } + } + }, + "entity": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "client": { + "type": "object", + "additionalProperties": false, + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + }, + "uriOptions": { + "type": "object" + }, + "useMultipleMongoses": { + "type": "boolean" + }, + "observeEvents": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent", + "poolCreatedEvent", + "poolReadyEvent", + "poolClearedEvent", + "poolClosedEvent", + "connectionCreatedEvent", + "connectionReadyEvent", + "connectionClosedEvent", + "connectionCheckOutStartedEvent", + "connectionCheckOutFailedEvent", + "connectionCheckedOutEvent", + "connectionCheckedInEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed", + "serverDescriptionChangedEvent", + "topologyDescriptionChangedEvent" + ] + } + }, + "ignoreCommandMonitoringEvents": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "storeEventsAsEntities": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/storeEventsAsEntity" + } + }, + "observeLogMessages": { + "type": "object", + "minProperties": 1, + "additionalProperties": false, + "properties": { + "command": { + "$ref": "#/definitions/logSeverityLevel" + }, + "topology": { + "$ref": "#/definitions/logSeverityLevel" + }, + "serverSelection": { + "$ref": "#/definitions/logSeverityLevel" + }, + "connection": { + "$ref": "#/definitions/logSeverityLevel" + } + } + }, + "serverApi": { + "$ref": "#/definitions/serverApi" + }, + "observeSensitiveCommands": { + "type": "boolean" + }, + "autoEncryptOpts": { + "type": "object", + "additionalProperties": false, + "required": [ + "keyVaultNamespace", + "kmsProviders" + ], + "properties": { + "keyVaultNamespace": { + "type": "string" + }, + "bypassAutoEncryption": { + "type": "boolean" + }, + "kmsProviders": { + "$ref": "#/definitions/kmsProviders" + }, + "schemaMap": { + "type": "object", + "additionalProperties": { + "type": "object" + } + }, + "extraOptions": { + "type": "object" + }, + "encryptedFieldsMap": { + "type": "object", + "additionalProperties": { + "type": "object" + } + }, + "bypassQueryAnalysis": { + "type": "boolean" + }, + "keyExpirationMS": { + "type": "integer" + } + } + } + } + }, + "clientEncryption": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "clientEncryptionOpts" + ], + "properties": { + "id": { + "type": "string" + }, + "clientEncryptionOpts": { + "$ref": "#/definitions/clientEncryptionOpts" + } + } + }, + "database": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "client", + "databaseName" + ], + "properties": { + "id": { + "type": "string" + }, + "client": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "databaseOptions": { + "$ref": "#/definitions/collectionOrDatabaseOptions" + } + } + }, + "collection": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "database", + "collectionName" + ], + "properties": { + "id": { + "type": "string" + }, + "database": { + "type": "string" + }, + "collectionName": { + "type": "string" + }, + "collectionOptions": { + "$ref": "#/definitions/collectionOrDatabaseOptions" + } + } + }, + "session": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "client" + ], + "properties": { + "id": { + "type": "string" + }, + "client": { + "type": "string" + }, + "sessionOptions": { + "type": "object" + } + } + }, + "bucket": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "database" + ], + "properties": { + "id": { + "type": "string" + }, + "database": { + "type": "string" + }, + "bucketOptions": { + "type": "object" + } + } + }, + "thread": { + "type": "object", + "additionalProperties": false, + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + } + } + } + } + }, + "logComponent": { + "type": "string", + "enum": [ + "command", + "topology", + "serverSelection", + "connection" + ] + }, + "logSeverityLevel": { + "type": "string", + "enum": [ + "emergency", + "alert", + "critical", + "error", + "warning", + "notice", + "info", + "debug", + "trace" + ] + }, + "clientEncryptionOpts": { + "type": "object", + "additionalProperties": false, + "required": [ + "keyVaultClient", + "keyVaultNamespace", + "kmsProviders" + ], + "properties": { + "keyVaultClient": { + "type": "string" + }, + "keyVaultNamespace": { + "type": "string" + }, + "kmsProviders": { + "$ref": "#/definitions/kmsProviders" + }, + "keyExpirationMS": { + "type": "integer" + } + } + }, + "kmsProviders": { + "$defs": { + "stringOrPlaceholder": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": false, + "required": [ + "$$placeholder" + ], + "properties": { + "$$placeholder": {} + } + } + ] + } + }, + "type": "object", + "additionalProperties": false, + "patternProperties": { + "^aws(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "accessKeyId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "secretAccessKey": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "sessionToken": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^azure(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "tenantId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "clientId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "clientSecret": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "identityPlatformEndpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^gcp(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "email": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "privateKey": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "endpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^kmip(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "endpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^local(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "key": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + } + } + }, + "storeEventsAsEntity": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "events" + ], + "properties": { + "id": { + "type": "string" + }, + "events": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "PoolCreatedEvent", + "PoolReadyEvent", + "PoolClearedEvent", + "PoolClosedEvent", + "ConnectionCreatedEvent", + "ConnectionReadyEvent", + "ConnectionClosedEvent", + "ConnectionCheckOutStartedEvent", + "ConnectionCheckOutFailedEvent", + "ConnectionCheckedOutEvent", + "ConnectionCheckedInEvent", + "CommandStartedEvent", + "CommandSucceededEvent", + "CommandFailedEvent", + "ServerDescriptionChangedEvent", + "TopologyDescriptionChangedEvent" + ] + } + } + } + }, + "collectionData": { + "type": "object", + "additionalProperties": false, + "required": [ + "collectionName", + "databaseName", + "documents" + ], + "properties": { + "collectionName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "createOptions": { + "type": "object", + "properties": { + "writeConcern": false + } + }, + "documents": { + "type": "array", + "items": { + "type": "object" + } + } + } + }, + "expectedEventsForClient": { + "type": "object", + "additionalProperties": false, + "required": [ + "client", + "events" + ], + "properties": { + "client": { + "type": "string" + }, + "eventType": { + "type": "string", + "enum": [ + "command", + "cmap", + "sdam" + ] + }, + "events": { + "type": "array" + }, + "ignoreExtraEvents": { + "type": "boolean" + } + }, + "oneOf": [ + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "command" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCommandEvent" + } + } + } + }, + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "cmap" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCmapEvent" + } + } + } + }, + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "sdam" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedSdamEvent" + } + } + } + }, + { + "additionalProperties": false, + "properties": { + "client": { + "type": "string" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCommandEvent" + } + }, + "ignoreExtraEvents": { + "type": "boolean" + } + } + } + ] + }, + "expectedCommandEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "commandStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "command": { + "type": "object" + }, + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + }, + "commandSucceededEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reply": { + "type": "object" + }, + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + }, + "commandFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + } + } + }, + "expectedCmapEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "poolCreatedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "poolReadyEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "poolClearedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "hasServiceId": { + "type": "boolean" + }, + "interruptInUseConnections": { + "type": "boolean" + } + } + }, + "poolClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCreatedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionReadyEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + }, + "connectionCheckOutStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCheckOutFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + }, + "connectionCheckedOutEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCheckedInEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + } + } + }, + "expectedSdamEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "serverDescriptionChangedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "previousDescription": { + "$ref": "#/definitions/serverDescription" + }, + "newDescription": { + "$ref": "#/definitions/serverDescription" + } + } + }, + "topologyDescriptionChangedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "previousDescription": { + "$ref": "#/definitions/topologyDescription" + }, + "newDescription": { + "$ref": "#/definitions/topologyDescription" + } + } + }, + "serverHeartbeatStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "serverHeartbeatSucceededEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "serverHeartbeatFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "topologyOpeningEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "topologyClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + } + } + }, + "serverDescription": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": [ + "Standalone", + "Mongos", + "PossiblePrimary", + "RSPrimary", + "RSSecondary", + "RSOther", + "RSArbiter", + "RSGhost", + "LoadBalancer", + "Unknown" + ] + } + } + }, + "topologyDescription": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": [ + "Single", + "Unknown", + "ReplicaSetNoPrimary", + "ReplicaSetWithPrimary", + "Sharded", + "LoadBalanced" + ] + } + } + }, + "expectedLogMessagesForClient": { + "type": "object", + "additionalProperties": false, + "required": [ + "client", + "messages" + ], + "properties": { + "client": { + "type": "string" + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedLogMessage" + } + }, + "ignoreExtraMessages": { + "type": "boolean" + }, + "ignoreMessages": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedLogMessage" + } + } + } + }, + "expectedLogMessage": { + "type": "object", + "additionalProperties": false, + "required": [ + "level", + "component", + "data" + ], + "properties": { + "level": { + "$ref": "#/definitions/logSeverityLevel" + }, + "component": { + "$ref": "#/definitions/logComponent" + }, + "data": { + "type": "object" + }, + "failureIsRedacted": { + "type": "boolean" + } + } + }, + "collectionOrDatabaseOptions": { + "type": "object", + "additionalProperties": false, + "properties": { + "readConcern": { + "type": "object" + }, + "readPreference": { + "type": "object" + }, + "writeConcern": { + "type": "object" + }, + "timeoutMS": { + "type": "integer" + } + } + }, + "serverApi": { + "type": "object", + "additionalProperties": false, + "required": [ + "version" + ], + "properties": { + "version": { + "type": "string" + }, + "strict": { + "type": "boolean" + }, + "deprecationErrors": { + "type": "boolean" + } + } + }, + "operation": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "object" + ], + "properties": { + "name": { + "type": "string" + }, + "object": { + "type": "string" + }, + "arguments": { + "type": "object" + }, + "ignoreResultAndError": { + "type": "boolean" + }, + "expectError": { + "$ref": "#/definitions/expectedError" + }, + "expectResult": {}, + "saveResultAsEntity": { + "type": "string" + } + }, + "allOf": [ + { + "not": { + "required": [ + "expectError", + "expectResult" + ] + } + }, + { + "not": { + "required": [ + "expectError", + "saveResultAsEntity" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "expectResult" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "expectError" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "saveResultAsEntity" + ] + } + } + ] + }, + "expectedError": { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "isError": { + "type": "boolean", + "const": true + }, + "isClientError": { + "type": "boolean" + }, + "isTimeoutError": { + "type": "boolean" + }, + "errorContains": { + "type": "string" + }, + "errorCode": { + "type": "integer" + }, + "errorCodeName": { + "type": "string" + }, + "errorLabelsContain": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "errorLabelsOmit": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "writeErrors": { + "type": "object" + }, + "writeConcernErrors": { + "type": "array", + "items": { + "type": "object" + } + }, + "errorResponse": { + "type": "object" + }, + "expectResult": {} + } + }, + "test": { + "type": "object", + "additionalProperties": false, + "required": [ + "description", + "operations" + ], + "properties": { + "description": { + "type": "string" + }, + "runOnRequirements": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/runOnRequirement" + } + }, + "skipReason": { + "type": "string" + }, + "operations": { + "type": "array", + "items": { + "$ref": "#/definitions/operation" + } + }, + "expectEvents": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectedEventsForClient" + } + }, + "expectLogMessages": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectedLogMessagesForClient" + } + }, + "outcome": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/collectionData" + } + } + } + } + } +} diff --git a/source/unified-test-format/tests/Makefile b/source/unified-test-format/tests/Makefile index 1a049e72ce..eea2977432 100644 --- a/source/unified-test-format/tests/Makefile +++ b/source/unified-test-format/tests/Makefile @@ -1,4 +1,4 @@ -SCHEMA=../schema-1.23.json +SCHEMA=../schema-1.24.json .PHONY: all invalid valid-fail valid-pass atlas-data-lake versioned-api load-balancers gridfs transactions transactions-convenient-api crud collection-management read-write-concern retryable-reads retryable-writes sessions command-logging-and-monitoring client-side-operations-timeout HAS_AJV diff --git a/source/unified-test-format/unified-test-format.md b/source/unified-test-format/unified-test-format.md index a2c0522eb2..63d8b42a6d 100644 --- a/source/unified-test-format/unified-test-format.md +++ b/source/unified-test-format/unified-test-format.md @@ -2,7 +2,7 @@ - Status: Accepted - Minimum Server Version: N/A -- Current Schema Version: 1.19.0 +- Current Schema Version: 1.24.0 ______________________________________________________________________ @@ -480,6 +480,9 @@ The structure of this object is as follows: - [connectionCheckOutFailedEvent](#expectedEvent_connectionCheckOutFailedEvent) - [connectionCheckedOutEvent](#expectedEvent_connectionCheckedOutEvent) - [connectionCheckedInEvent](#expectedEvent_connectionCheckedInEvent) + - [connectionPendingResponseStarted](#expectedEvent_connectionPendingResponseStarted) + - [connectionPendingResponseSucceeded](#expectedEvent_connectionPendingResponseSucceeded) + - connectionPendingResponseFailed](#expectedEvent_connectionPendingResponseFailed) - [serverDescriptionChangedEvent](#expectedEvent_serverDescriptionChangedEvent) - [serverHeartbeatStartedEvent](#expectedEvent_serverHeartbeatStartedEvent) - [serverHeartbeatSucceededEvent](#expectedEvent_serverHeartbeatSucceededEvent) @@ -1149,6 +1152,27 @@ The structure of this object is as follows: - `connectionCheckedInEvent`: Optional object. If present, this object MUST be an empty document as all fields in this event are non-deterministic. + + +- `connectionPendingResponseStarted`: Optional object. If present, this object MUST be an empty document as all fields in this + event are non-deterministic. + + + +- `connectionPendingResponseSucceeded`: Optional object. If present, this object MUST be an empty document as all fields in this + event are non-deterministic. + + + +- `connectionPendingResponseFailed`: Optional object. Assertions for one or more + [connectionPendingResponseFailed](../connection-monitoring-and-pooling/connection-monitoring-and-pooling.md#events) + fields. + + The structure of this object is as follows: + + - `reason`: Optional string. Test runners MUST assert that the reason in the published event matches this value. Valid + values for this field are defined in the CMAP spec. + ##### expectedSdamEvent The structure of this object is as follows: @@ -3555,6 +3579,8 @@ other specs *and* collating spec changes developed in parallel or during the sam ## Changelog +- 2025-04-25: **Schema version 1.24** + - 2025-01-21: **Schema version 1.23.** Support automatic encryption. Add `autoEncryptOpts` to `client` entity. From e2653cb300dab659873cc9e7ef1739deea009a9b Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 15:15:19 -0600 Subject: [PATCH 21/31] DRIVERS-2884 Update schema w/ new connection events --- .../tests/pending-response.yml | 7 ------- source/unified-test-format/schema-1.24.json | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index cec6e5eed8..edf9dddc00 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -111,7 +111,6 @@ tests: # Run a ping command to pre-load the pool with a connection. - name: runCommand object: *database - command_name: ping arguments: command: ping: 1 @@ -177,7 +176,6 @@ tests: # Run a ping command to pre-load the pool with a connection. - name: runCommand object: *database - command_name: ping arguments: command: ping: 1 @@ -231,8 +229,3 @@ tests: - connectionPendingResponseSucceeded: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # find succeeds - - # Attempting pending response read on a realistic timeout operation that gets - # no response from the server after the pending response start time has been - # exceeded should close. - diff --git a/source/unified-test-format/schema-1.24.json b/source/unified-test-format/schema-1.24.json index a2d92c5d03..b19fd02548 100644 --- a/source/unified-test-format/schema-1.24.json +++ b/source/unified-test-format/schema-1.24.json @@ -787,6 +787,25 @@ "type": "object", "additionalProperties": false, "properties": {} + }, + "connectionPendingResponseStarted": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseSucceeded": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseFailed": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } } } }, From 00aa6201e9d2fcfcaf7c01d804c3f95818b75bb4 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 15:20:55 -0600 Subject: [PATCH 22/31] DRIVERS-2884 Remove additional properties --- .../tests/logging/connection-logging.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml index 74a233b3a7..d0d82ed3c8 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml @@ -251,7 +251,6 @@ tests: # Run a ping command to pre-load the pool with a connection. - name: runCommand object: *database - command_name: ping arguments: command: ping: 1 From b29d6ccc028d787a71d049352ff4003446f2ffb3 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 15:28:43 -0600 Subject: [PATCH 23/31] DRIVERS-2884 Remove ignoring extra events --- .../tests/logging/connection-logging.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml index d0d82ed3c8..7e5102bb95 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml @@ -1,6 +1,6 @@ description: "connection-logging" -schemaVersion: "1.13" +schemaVersion: "1.24" runOnRequirements: - topologies: From b04b340dd5f85d588e9301d4111b13064f315457 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 15:50:16 -0600 Subject: [PATCH 24/31] DRIVERS-2884 Clean up tests --- .../tests/pending-response.json | 2 - .../tests/pending-response.yml | 114 ++++++++---------- .../tests/logging/connection-logging.json | 3 +- 3 files changed, 48 insertions(+), 71 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 266461d08b..7e2ca2a3d9 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -174,7 +174,6 @@ { "name": "runCommand", "object": "test", - "command_name": "ping", "arguments": { "command": { "ping": 1 @@ -283,7 +282,6 @@ { "name": "runCommand", "object": "test", - "command_name": "ping", "arguments": { "command": { "ping": 1 diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index edf9dddc00..76b233dff3 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -1,15 +1,12 @@ description: "Connection churn is prevented by reading pending responses during connection checkout" - schemaVersion: "1.24" - runOnRequirements: - minServerVersion: "4.4" - # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired - # after maxTimeMS, whereas mongod returns it after - # max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests - # will not pass on sharded clusters. + # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired + # after maxTimeMS, whereas mongod returns it after + # max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests + # will not pass on sharded clusters. topologies: ["single", "replicaset"] - createEntities: - client: id: &failPointClient failPointClient @@ -36,58 +33,52 @@ createEntities: id: &collection coll database: *database collectionName: *collection - initialData: - collectionName: *collection databaseName: *database documents: [] - tests: - # Attempting pending response read on a non-timeout operation that can - # immediately read from the TCP buffer should complete the pending read and - # the connection should be checked out. + # Attempting a pending response read on a non-timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. - description: "non-timeout write op with response and no error" operations: - # Run a ping command to pre-load the pool with a connection. + # Run a ping command to pre-load the pool with a connection. - name: runCommand object: *database arguments: command: ping: 1 - - # Create a failpoint to block the first operation + # Create a failpoint to block the first operation. - name: failPoint object: testRunner arguments: client: *failPointClient failPoint: configureFailPoint: failCommand - mode: { times: 1 } + mode: {times: 1} data: failCommands: ["insert"] blockConnection: true blockTimeMS: 100 - - # Execute operation with timeout less than block time + # Execute operation with timeout less than block time. - name: insertOne object: *collection arguments: timeoutMS: 75 - document: { _id: 3, x: 1 } + document: {_id: 3, x: 1} expectError: isTimeoutError: true - - # Execute a subsequent operation to complete the read + # Execute a subsequent operation to complete the read. - name: findOne object: *collection arguments: - filter: { _id: 1 } - + filter: {_id: 1} expectEvents: - client: *client events: - commandSucceededEvent: - commandName: ping # pre-loading the connection pool. + commandName: ping # Pre-loading the connection pool. - commandFailedEvent: commandName: insert - commandSucceededEvent: @@ -96,124 +87,113 @@ tests: eventType: cmap events: - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # ping finishes + - connectionCheckedInEvent: {} # Ping finishes. - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # insert fails + - connectionCheckedInEvent: {} # Insert fails. - connectionPendingResponseStarted: {} - - connectionPendingResponseSucceeded: {} # find op drains conn + - connectionPendingResponseSucceeded: {} # Find operation drains connection. - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # find succeeds - - # Attempting pending response read on a non-timeout operation that gets no - # response from the server after 3s should close the connection. + - connectionCheckedInEvent: {} # Find succeeds. + # Attempting a pending response read on a non-timeout operation that gets no + # response from the server after 3s should close the connection. - description: "non-timeout op with no response" operations: - # Run a ping command to pre-load the pool with a connection. + # Run a ping command to pre-load the pool with a connection. - name: runCommand object: *database arguments: command: ping: 1 - - # Create a failpoint to block the first operation + # Create a failpoint to block the first operation. - name: failPoint object: testRunner arguments: client: *failPointClient failPoint: configureFailPoint: failCommand - mode: { times: 1 } + mode: {times: 1} data: failCommands: ["insert"] blockConnection: true blockTimeMS: 3100 - - # Execute operation with timeout less than block time + # Execute operation with timeout less than block time. - name: insertOne object: *collection arguments: timeoutMS: 50 - document: { _id: 3, x: 1 } + document: {_id: 3, x: 1} expectError: isTimeoutError: true - - # Execute a subsequent operation to complete the read + # Execute a subsequent operation to complete the read. - name: findOne object: *collection arguments: - filter: { _id: 1 } + filter: {_id: 1} expectError: isTimeoutError: true - expectEvents: - client: *client events: - commandSucceededEvent: - commandName: ping # pre-loading the connection pool. + commandName: ping # Pre-loading the connection pool. - commandFailedEvent: commandName: insert - # No second failed event since we timed out attempting to check out - # the connection for the second operation + # No second failed event since we timed out attempting to check out + # the connection for the second operation. - client: *client eventType: cmap events: - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # ping finishes + - connectionCheckedInEvent: {} # Ping finishes. - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # insert fails + - connectionCheckedInEvent: {} # Insert fails. - connectionPendingResponseStarted: {} - connectionPendingResponseFailed: reason: timeout - connectionClosedEvent: reason: error - - # Attempting pending response read on a realistic timeout operation that can - # immediately read from the TCP buffer should complete the pending read and - # the connection should be checked out. + # Attempting a pending response read on a realistic timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. - description: "timeout write op with response and no error" operations: - # Run a ping command to pre-load the pool with a connection. + # Run a ping command to pre-load the pool with a connection. - name: runCommand object: *database arguments: command: ping: 1 - - # Create a failpoint to block the first operation + # Create a failpoint to block the first operation. - name: failPoint object: testRunner arguments: client: *failPointClient failPoint: configureFailPoint: failCommand - mode: { times: 1 } + mode: {times: 1} data: failCommands: ["insert"] blockConnection: true blockTimeMS: 250 - - # Execute operation with timeout less than block time + # Execute operation with timeout less than block time. - name: insertOne object: *collection arguments: timeoutMS: 75 - document: { _id: 3, x: 1 } + document: {_id: 3, x: 1} expectError: isTimeoutError: true - - # Execute a subsequent operation to complete the read + # Execute a subsequent operation to complete the read. - name: findOne object: *collection arguments: timeoutMS: 200 - filter: { _id: 1 } - + filter: {_id: 1} expectEvents: - client: *client events: - commandSucceededEvent: - commandName: ping # pre-loading the connection pool. + commandName: ping # Pre-loading the connection pool. - commandFailedEvent: commandName: insert - commandSucceededEvent: @@ -222,10 +202,10 @@ tests: eventType: cmap events: - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # ping finishes + - connectionCheckedInEvent: {} # Ping finishes. - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # insert fails + - connectionCheckedInEvent: {} # Insert fails. - connectionPendingResponseStarted: {} - connectionPendingResponseSucceeded: {} - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # find succeeds + - connectionCheckedInEvent: {} # Find succeeds. diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json index f2a0734744..d545e8034c 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.json @@ -1,6 +1,6 @@ { "description": "connection-logging", - "schemaVersion": "1.13", + "schemaVersion": "1.24", "runOnRequirements": [ { "topologies": [ @@ -560,7 +560,6 @@ { "name": "runCommand", "object": "test", - "command_name": "ping", "arguments": { "command": { "ping": 1 From 40b302cf5a9e725fc8b6c9f08321cfd852157f54 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 15:53:59 -0600 Subject: [PATCH 25/31] DRIVERS-2884 Uncapitalize D in ID --- .../connection-monitoring-and-pooling.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index ef8919c08e..ca28be8578 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1378,14 +1378,14 @@ In addition to the common fields defined above, this message MUST contain the fo | Key | Suggested Type | Value | | ------------------ | -------------- | ------------------------------------------------------------------- | | message | string | "Pending response started" | -| driverConnectionID | int64 | The driver-generated ID for the connection | -| requestID | int64 | The driver-generated request ID associated with the network timeout | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID associated with the network timeout | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: > Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request -> ID={{requestID}} +> Id={{requestId}} #### Connection Pending Response Succeeded @@ -1394,15 +1394,15 @@ In addition to the common fields defined above, this message MUST contain the fo | Key | Suggested Type | Value | | ------------------ | ------------------ | ------------------------------------------------------------------- | | message | string | "Pending response succeeded" | -| driverConnectionID | int64 | The driver-generated ID for the connection | -| requestID | int64 | The driver-generated request ID associated with the network timeout | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID associated with the network timeout | | durationMS | Int32/Int64/Double | The time it took to complete the pending read | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: > Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, -> DurationMS={{durationMS}} ms +> requestId={{requestId}}, duration={{durationMS}} ms #### Connection Pending Response Failed @@ -1411,7 +1411,7 @@ In addition to the common fields defined above, this message MUST contain the fo | Key | Suggested Type | Value | | ------------------ | -------------- | --------------------------------------------------- | | message | string | "Pending response failed" | -| driverConnectionID | int64 | The driver-generated ID for the connection | +| driverConnectionId | int64 | The driver-generated ID for the connection | | reason | string | The reason for why the pending response read failed | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in From 4348b44174cc85523fa30855400e9f00ddcd30d1 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 16:01:09 -0600 Subject: [PATCH 26/31] DRIVERS-2884 Another ID cleanup --- .../connection-monitoring-and-pooling.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index ca28be8578..87cdc51845 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1120,7 +1120,7 @@ interface PendingResponseStarted { /** * The driver-generated request ID associated with the network timeout. */ - requestID: int64; + requestId: int64; } /** @@ -1146,7 +1146,7 @@ interface PendingResponseSucceeded { /** * The driver-generated request ID associated with the network timeout. */ - requestID: int64; + requestId: int64; } /** @@ -1167,7 +1167,7 @@ interface PendingResponseFailed { /** * The driver-generated request ID associated with the network timeout. */ - requestID: int64; + requestId: int64; /** * The reason for why the pending read failed. @@ -1384,8 +1384,8 @@ In addition to the common fields defined above, this message MUST contain the fo The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: -> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request -> Id={{requestId}} +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, +> request ID={{requestId}} #### Connection Pending Response Succeeded @@ -1402,7 +1402,7 @@ The unstructured form SHOULD be as follows, using the values defined in the stru placeholders as appropriate: > Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, -> requestId={{requestId}}, duration={{durationMS}} ms +> request ID={{requestId}}, duration={{durationMS}} ms #### Connection Pending Response Failed @@ -1412,13 +1412,14 @@ In addition to the common fields defined above, this message MUST contain the fo | ------------------ | -------------- | --------------------------------------------------- | | message | string | "Pending response failed" | | driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID associated with the network timeout | | reason | string | The reason for why the pending response read failed | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: > Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, -> reason={{reason}} +> request ID={{requestId}}, reason={{reason}} ### Connection Pool Errors From dd0dbe928b8d1d8c6f40073105c892a1c6efb2d6 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 16:13:53 -0600 Subject: [PATCH 27/31] DRIVERS-2884 Remove the word write --- .../client-side-operations-timeout/tests/pending-response.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index 76b233dff3..931436d5a0 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -41,7 +41,7 @@ tests: # Attempting a pending response read on a non-timeout operation that can # immediately read from the TCP buffer should complete the pending read and # the connection should be checked out. - - description: "non-timeout write op with response and no error" + - description: "non-timeout op with response and no error" operations: # Run a ping command to pre-load the pool with a connection. - name: runCommand @@ -155,7 +155,7 @@ tests: # Attempting a pending response read on a realistic timeout operation that can # immediately read from the TCP buffer should complete the pending read and # the connection should be checked out. - - description: "timeout write op with response and no error" + - description: "timeout op with response and no error" operations: # Run a ping command to pre-load the pool with a connection. - name: runCommand From e43c4666760314458c93d8b2e930844e87e464ea Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 25 Apr 2025 16:15:59 -0600 Subject: [PATCH 28/31] DRIVERS-2884 Remove the word write --- .../tests/pending-response.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 7e2ca2a3d9..05c1e8b53c 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -60,7 +60,7 @@ ], "tests": [ { - "description": "non-timeout write op with response and no error", + "description": "non-timeout op with response and no error", "operations": [ { "name": "runCommand", @@ -277,7 +277,7 @@ ] }, { - "description": "timeout write op with response and no error", + "description": "timeout op with response and no error", "operations": [ { "name": "runCommand", From 89754cecf1d6354ba2a1953f359f4f2fed59ea63 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Mon, 28 Apr 2025 10:21:41 -0600 Subject: [PATCH 29/31] Add punctuation --- .../connection-monitoring-and-pooling.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 87cdc51845..8610fcdfa8 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1578,7 +1578,7 @@ closure, but will still close in pathological conditions (e.g., a dead server or Because of the ["no knobs" mantra](https://github.com/mongodb/specifications/blob/master/source/driver-mantras.md#no-knobs). We can -always reconsider this in the future +always reconsider this in the future. ### Why does the pending response read timeout include the time the connection is idle in the pool? From a3c00a41f60b9e3da9ff24b9fda105e59557445c Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Apr 2025 17:00:26 -0600 Subject: [PATCH 30/31] DRIVERS-2884 Update schema latest --- source/unified-test-format/schema-latest.json | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/source/unified-test-format/schema-latest.json b/source/unified-test-format/schema-latest.json index b454199b05..b19fd02548 100644 --- a/source/unified-test-format/schema-latest.json +++ b/source/unified-test-format/schema-latest.json @@ -143,6 +143,9 @@ "connectionCheckOutFailedEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed", "serverDescriptionChangedEvent", "topologyDescriptionChangedEvent" ] @@ -784,6 +787,25 @@ "type": "object", "additionalProperties": false, "properties": {} + }, + "connectionPendingResponseStarted": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseSucceeded": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseFailed": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } } } }, @@ -1156,4 +1178,4 @@ } } } -} \ No newline at end of file +} From 6a663eb5b48734047d81b838aa301f454233b1da Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Wed, 30 Apr 2025 17:02:18 -0600 Subject: [PATCH 31/31] DRIVERS-2884 Clarify schema bump --- source/unified-test-format/unified-test-format.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/unified-test-format/unified-test-format.md b/source/unified-test-format/unified-test-format.md index 63d8b42a6d..3cc15b351d 100644 --- a/source/unified-test-format/unified-test-format.md +++ b/source/unified-test-format/unified-test-format.md @@ -3580,6 +3580,9 @@ other specs *and* collating spec changes developed in parallel or during the sam ## Changelog - 2025-04-25: **Schema version 1.24** + + Add pending response events to `expectEvents` and `observeEvents` for client entities. These include + `PendingResponseStartedEvent`, `PendingResponseSucceededEvent`, and `PendingResponseFailedEvent`. - 2025-01-21: **Schema version 1.23.**