Skip to content

Commit

Permalink
Treat all transport errors as recoverable
Browse files Browse the repository at this point in the history
RTN14d says that a transport error should be considered recoverable if
it is

> a network failure, a timeout such as RTN14c, or a disconnected
> response, other than a token failure RTN14b)

However, it does not define what it means by a "network failure",
leading to each platform having to provide its own interpretation.

In particular, a client recently told us that they’ve been seeing lots
of OSStatus 9806 (errSSLClosedAbort) errors. This appears to indicate
some sort of failure to perform an SSL handshake. We don’t understand
the cause of this issue but we noticed that it was causing the client to
transition to the FAILED state.

Speaking to Paddy, he said that this error should be provoking a
connection retry, not failure. And more broadly, he indicated that,
basically, _all_ transport errors should be considered recoverable. So,
that’s what we do here. He’s raised a specification issue [1] for us to
properly specify this and to decide if there are any nuances to
consider, but is keen for us to implement this broad behaviour in
ably-cocoa ASAP to help the customer experiencing the errSSLClosedAbort
errors.

Resolves #1817.

[1] ably/specification#171
  • Loading branch information
lawrence-forooghian committed Nov 6, 2023
1 parent 373211c commit e716768
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 41 deletions.
30 changes: 10 additions & 20 deletions Source/ARTRealtime.m
Original file line number Diff line number Diff line change
Expand Up @@ -1629,21 +1629,10 @@ - (void)realtimeTransportFailed:(id<ARTRealtimeTransport>)transport withError:(A
return;
}
}

switch (transportError.type) {
case ARTRealtimeTransportErrorTypeBadResponse:
case ARTRealtimeTransportErrorTypeOther: {
ARTErrorInfo *const errorInfo = [ARTErrorInfo createFromNSError:transportError.error];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo];
[self transition:ARTRealtimeFailed withMetadata:metadata];
break;
}
default: {
ARTErrorInfo *error = [ARTErrorInfo createFromNSError:transportError.error];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:error];
[self transitionToDisconnectedOrSuspendedWithMetadata:metadata];
}
}

ARTErrorInfo *const errorInfo = [ARTErrorInfo createFromNSError:transportError.error];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo];
[self transitionToDisconnectedOrSuspendedWithMetadata:metadata];
}

- (void)realtimeTransportNeverConnected:(id<ARTRealtimeTransport>)transport {
Expand All @@ -1654,7 +1643,7 @@ - (void)realtimeTransportNeverConnected:(id<ARTRealtimeTransport>)transport {

ARTErrorInfo *const errorInfo = [ARTErrorInfo createWithCode:ARTClientCodeErrorTransport message:@"Transport never connected"];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo];
[self transition:ARTRealtimeFailed withMetadata:metadata];
[self transitionToDisconnectedOrSuspendedWithMetadata:metadata];
}

- (void)realtimeTransportRefused:(id<ARTRealtimeTransport>)transport withError:(ARTRealtimeTransportError *)error {
Expand All @@ -1666,15 +1655,16 @@ - (void)realtimeTransportRefused:(id<ARTRealtimeTransport>)transport withError:(
if (error && error.type == ARTRealtimeTransportErrorTypeRefused) {
ARTErrorInfo *const errorInfo = [ARTErrorInfo createWithCode:ARTClientCodeErrorTransport message:[NSString stringWithFormat:@"Connection refused using %@", error.url]];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo];
[self transition:ARTRealtimeFailed withMetadata:metadata];
[self transitionToDisconnectedOrSuspendedWithMetadata:metadata];
}
else if (error) {
ARTErrorInfo *const errorInfo = [ARTErrorInfo createFromNSError:error.error];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo];
[self transition:ARTRealtimeFailed withMetadata:metadata];
[self transitionToDisconnectedOrSuspendedWithMetadata:metadata];
}
else {
[self transition:ARTRealtimeFailed withMetadata:[[ARTConnectionStateChangeMetadata alloc] init]];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] init];
[self transitionToDisconnectedOrSuspendedWithMetadata:metadata];
}
}

Expand All @@ -1686,7 +1676,7 @@ - (void)realtimeTransportTooBig:(id<ARTRealtimeTransport>)transport {

ARTErrorInfo *const errorInfo = [ARTErrorInfo createWithCode:ARTClientCodeErrorTransport message:@"Transport too big"];
ARTConnectionStateChangeMetadata *const metadata = [[ARTConnectionStateChangeMetadata alloc] initWithErrorInfo:errorInfo];
[self transition:ARTRealtimeFailed withMetadata:metadata];
[self transitionToDisconnectedOrSuspendedWithMetadata:metadata];
}

- (void)realtimeTransportSetMsgSerial:(id<ARTRealtimeTransport>)transport msgSerial:(int64_t)msgSerial {
Expand Down
8 changes: 5 additions & 3 deletions Test/Test Utilities/TestUtilities.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1658,9 +1658,11 @@ extension ARTWebSocketTransport {
}

func simulateIncomingError() {
let error = NSError(domain: ARTAblyErrorDomain, code: 0, userInfo: [NSLocalizedDescriptionKey:"Fail test"])
let webSocketDelegate = self as ARTWebSocketDelegate
webSocketDelegate.webSocket?(self.websocket!, didFailWithError: error)
// Simulate receiving an ERROR ProtocolMessage, which should put a client into the FAILED state (per RTN15i)
let protocolMessage = ARTProtocolMessage()
protocolMessage.action = .error
protocolMessage.error = ARTErrorInfo.create(withCode: 50000 /* arbitrarily chosen */, message: "Fail test")
receive(protocolMessage)
}
}

Expand Down
41 changes: 23 additions & 18 deletions Test/Tests/RealtimeClientConnectionTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3685,41 +3685,46 @@ class RealtimeClientConnectionTests: XCTestCase {
try testMovesToDisconnectedWithNetworkingError(NSError(domain: kCFErrorDomainCFNetwork as String, code: 1337, userInfo: [NSLocalizedDescriptionKey: "shouldn't matter"]), for: test)
}

func test__090__Connection__Host_Fallback__should_not_use_an_alternative_host_when_the_client_receives_a_bad_request() {
let test = Test()
func test__090__Connection__Host_Fallback__should_not_use_an_alternative_host_when_the_client_receives_a_bad_request() throws {
let options = ARTClientOptions(key: "xxxx:xxxx")
options.autoConnect = false
options.disconnectedRetryTimeout = 1.0 // so that the test doesn't have to wait a long time to observe a retry
options.testOptions.realtimeRequestTimeout = 1.0
let transportFactory = TestProxyTransportFactory()
options.testOptions.transportFactory = transportFactory
let client = ARTRealtime(options: options)
let channel = client.channels.get(test.uniqueChannelName())

transportFactory.fakeNetworkResponse = .host400BadRequest

var urlConnections = [URL]()
transportFactory.networkConnectEvent = { transport, url in
if client.internal.transport !== transport {
return
let dataGatherer = DataGatherer(description: "Observe emitted state changes and transport connection attempts") { submit in
var stateChanges: [ARTConnectionStateChange] = []
var urlConnections = [URL]()

client.connection.on { stateChange in
stateChanges.append(stateChange)
if (stateChanges.count == 3) {
submit((stateChanges: stateChanges, urlConnections: urlConnections))
}
}

transportFactory.networkConnectEvent = { transport, url in
if client.internal.transport !== transport {
return
}
urlConnections.append(url)
}
urlConnections.append(url)
}

client.connect()
defer { client.dispose(); client.close() }

// We expect the first connection attempt to fail due to the .fakeNetworkResponse configured above. This error does not meet the criteria for trying a fallback host, and so should not provoke the use of a fallback host. Hence the connection should give up and transition to the FAILED state (which causes the publish to fail). We should see that there was only one connection attempt, to the primary host.
let data = try dataGatherer.waitForData(timeout: testTimeout)

waitUntil(timeout: testTimeout) { done in
channel.publish(nil, data: "message") { error in
XCTAssertNotNil(error)
done()
}
}
// We expect the first connection attempt to fail due to the .fakeNetworkResponse configured above. This error does not meet the criteria for trying a fallback host, and so should not provoke the use of a fallback host. Hence the connection should transition to DISCONNECTED, and then subsequently retry, transitioning back to CONNECTING. We should see that there were two connection attempts, both to the primary host.

XCTAssertEqual(client.connection.state, .failed)
XCTAssertEqual(urlConnections.count, 1)
XCTAssertTrue(NSRegularExpression.match(urlConnections[0].absoluteString, pattern: "//realtime.ably.io"))
XCTAssertEqual(data.stateChanges.map(\.current), [.connecting, .disconnected, .connecting])
XCTAssertEqual(data.urlConnections.count, 2)
XCTAssertTrue(data.urlConnections.allSatisfy { url in NSRegularExpression.match(url.absoluteString, pattern: "//realtime.ably.io") })
}

// RTN17a
Expand Down

0 comments on commit e716768

Please sign in to comment.