From e716768a62d73c19fe841a0b89adc8fe6464a0fe Mon Sep 17 00:00:00 2001 From: Lawrence Forooghian Date: Fri, 3 Nov 2023 15:24:49 -0300 Subject: [PATCH] Treat all transport errors as recoverable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RTN14d says that a transport error should be considered recoverable if it is > a network failure, a timeout such as RTN14c, or a disconnected > response, other than a token failure RTN14b) However, it does not define what it means by a "network failure", leading to each platform having to provide its own interpretation. In particular, a client recently told us that they’ve been seeing lots of OSStatus 9806 (errSSLClosedAbort) errors. This appears to indicate some sort of failure to perform an SSL handshake. We don’t understand the cause of this issue but we noticed that it was causing the client to transition to the FAILED state. Speaking to Paddy, he said that this error should be provoking a connection retry, not failure. And more broadly, he indicated that, basically, _all_ transport errors should be considered recoverable. So, that’s what we do here. He’s raised a specification issue [1] for us to properly specify this and to decide if there are any nuances to consider, but is keen for us to implement this broad behaviour in ably-cocoa ASAP to help the customer experiencing the errSSLClosedAbort errors. Resolves #1817. [1] https://github.com/ably/specification/issues/171 --- Source/ARTRealtime.m | 30 +++++--------- Test/Test Utilities/TestUtilities.swift | 8 ++-- .../Tests/RealtimeClientConnectionTests.swift | 41 +++++++++++-------- 3 files changed, 38 insertions(+), 41 deletions(-) diff --git a/Source/ARTRealtime.m b/Source/ARTRealtime.m index ac1b0af9e..d207b27c8 100644 --- a/Source/ARTRealtime.m +++ b/Source/ARTRealtime.m @@ -1629,21 +1629,10 @@ - (void)realtimeTransportFailed:(id)transport withError:(A return; } } - - switch (transportError.type) { - case ARTRealtimeTransportErrorTypeBadResponse: - case ARTRealtimeTransportErrorTypeOther: { - ARTErrorInfo *const errorInfo = [ARTErrorInfo createFromNSError:transportError.error]; - ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo]; - [self transition:ARTRealtimeFailed withMetadata:metadata]; - break; - } - default: { - ARTErrorInfo *error = [ARTErrorInfo createFromNSError:transportError.error]; - ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:error]; - [self transitionToDisconnectedOrSuspendedWithMetadata:metadata]; - } - } + + ARTErrorInfo *const errorInfo = [ARTErrorInfo createFromNSError:transportError.error]; + ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo]; + [self transitionToDisconnectedOrSuspendedWithMetadata:metadata]; } - (void)realtimeTransportNeverConnected:(id)transport { @@ -1654,7 +1643,7 @@ - (void)realtimeTransportNeverConnected:(id)transport { ARTErrorInfo *const errorInfo = [ARTErrorInfo createWithCode:ARTClientCodeErrorTransport message:@"Transport never connected"]; ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo]; - [self transition:ARTRealtimeFailed withMetadata:metadata]; + [self transitionToDisconnectedOrSuspendedWithMetadata:metadata]; } - (void)realtimeTransportRefused:(id)transport withError:(ARTRealtimeTransportError *)error { @@ -1666,15 +1655,16 @@ - (void)realtimeTransportRefused:(id)transport withError:( if (error && error.type == ARTRealtimeTransportErrorTypeRefused) { ARTErrorInfo *const errorInfo = [ARTErrorInfo createWithCode:ARTClientCodeErrorTransport message:[NSString stringWithFormat:@"Connection refused using %@", error.url]]; ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo]; - [self transition:ARTRealtimeFailed withMetadata:metadata]; + [self transitionToDisconnectedOrSuspendedWithMetadata:metadata]; } else if (error) { ARTErrorInfo *const errorInfo = [ARTErrorInfo createFromNSError:error.error]; ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo]; - [self transition:ARTRealtimeFailed withMetadata:metadata]; + [self transitionToDisconnectedOrSuspendedWithMetadata:metadata]; } else { - [self transition:ARTRealtimeFailed withMetadata:[[ARTConnectionStateChangeMetadata alloc] init]]; + ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] init]; + [self transitionToDisconnectedOrSuspendedWithMetadata:metadata]; } } @@ -1686,7 +1676,7 @@ - (void)realtimeTransportTooBig:(id)transport { ARTErrorInfo *const errorInfo = [ARTErrorInfo createWithCode:ARTClientCodeErrorTransport message:@"Transport too big"]; ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo]; - [self transition:ARTRealtimeFailed withMetadata:metadata]; + [self transitionToDisconnectedOrSuspendedWithMetadata:metadata]; } - (void)realtimeTransportSetMsgSerial:(id)transport msgSerial:(int64_t)msgSerial { diff --git a/Test/Test Utilities/TestUtilities.swift b/Test/Test Utilities/TestUtilities.swift index 46721160e..ca55f278c 100644 --- a/Test/Test Utilities/TestUtilities.swift +++ b/Test/Test Utilities/TestUtilities.swift @@ -1658,9 +1658,11 @@ extension ARTWebSocketTransport { } func simulateIncomingError() { - let error = NSError(domain: ARTAblyErrorDomain, code: 0, userInfo: [NSLocalizedDescriptionKey:"Fail test"]) - let webSocketDelegate = self as ARTWebSocketDelegate - webSocketDelegate.webSocket?(self.websocket!, didFailWithError: error) + // Simulate receiving an ERROR ProtocolMessage, which should put a client into the FAILED state (per RTN15i) + let protocolMessage = ARTProtocolMessage() + protocolMessage.action = .error + protocolMessage.error = ARTErrorInfo.create(withCode: 50000 /* arbitrarily chosen */, message: "Fail test") + receive(protocolMessage) } } diff --git a/Test/Tests/RealtimeClientConnectionTests.swift b/Test/Tests/RealtimeClientConnectionTests.swift index ba2755392..a33af027a 100644 --- a/Test/Tests/RealtimeClientConnectionTests.swift +++ b/Test/Tests/RealtimeClientConnectionTests.swift @@ -3685,41 +3685,46 @@ class RealtimeClientConnectionTests: XCTestCase { try testMovesToDisconnectedWithNetworkingError(NSError(domain: kCFErrorDomainCFNetwork as String, code: 1337, userInfo: [NSLocalizedDescriptionKey: "shouldn't matter"]), for: test) } - func test__090__Connection__Host_Fallback__should_not_use_an_alternative_host_when_the_client_receives_a_bad_request() { - let test = Test() + func test__090__Connection__Host_Fallback__should_not_use_an_alternative_host_when_the_client_receives_a_bad_request() throws { let options = ARTClientOptions(key: "xxxx:xxxx") options.autoConnect = false + options.disconnectedRetryTimeout = 1.0 // so that the test doesn't have to wait a long time to observe a retry options.testOptions.realtimeRequestTimeout = 1.0 let transportFactory = TestProxyTransportFactory() options.testOptions.transportFactory = transportFactory let client = ARTRealtime(options: options) - let channel = client.channels.get(test.uniqueChannelName()) transportFactory.fakeNetworkResponse = .host400BadRequest - var urlConnections = [URL]() - transportFactory.networkConnectEvent = { transport, url in - if client.internal.transport !== transport { - return + let dataGatherer = DataGatherer(description: "Observe emitted state changes and transport connection attempts") { submit in + var stateChanges: [ARTConnectionStateChange] = [] + var urlConnections = [URL]() + + client.connection.on { stateChange in + stateChanges.append(stateChange) + if (stateChanges.count == 3) { + submit((stateChanges: stateChanges, urlConnections: urlConnections)) + } + } + + transportFactory.networkConnectEvent = { transport, url in + if client.internal.transport !== transport { + return + } + urlConnections.append(url) } - urlConnections.append(url) } client.connect() defer { client.dispose(); client.close() } - // We expect the first connection attempt to fail due to the .fakeNetworkResponse configured above. This error does not meet the criteria for trying a fallback host, and so should not provoke the use of a fallback host. Hence the connection should give up and transition to the FAILED state (which causes the publish to fail). We should see that there was only one connection attempt, to the primary host. + let data = try dataGatherer.waitForData(timeout: testTimeout) - waitUntil(timeout: testTimeout) { done in - channel.publish(nil, data: "message") { error in - XCTAssertNotNil(error) - done() - } - } + // We expect the first connection attempt to fail due to the .fakeNetworkResponse configured above. This error does not meet the criteria for trying a fallback host, and so should not provoke the use of a fallback host. Hence the connection should transition to DISCONNECTED, and then subsequently retry, transitioning back to CONNECTING. We should see that there were two connection attempts, both to the primary host. - XCTAssertEqual(client.connection.state, .failed) - XCTAssertEqual(urlConnections.count, 1) - XCTAssertTrue(NSRegularExpression.match(urlConnections[0].absoluteString, pattern: "//realtime.ably.io")) + XCTAssertEqual(data.stateChanges.map(\.current), [.connecting, .disconnected, .connecting]) + XCTAssertEqual(data.urlConnections.count, 2) + XCTAssertTrue(data.urlConnections.allSatisfy { url in NSRegularExpression.match(url.absoluteString, pattern: "//realtime.ably.io") }) } // RTN17a