-
Notifications
You must be signed in to change notification settings - Fork 494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Trigger Failover On 3092 And 1022 #4670
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ namespace Microsoft.Azure.Documents | |
using System.Net; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using Microsoft.Azure.Cosmos.ChangeFeed.Exceptions; | ||
using Microsoft.Azure.Cosmos.Core.Trace; | ||
using Microsoft.Azure.Documents.Routing; | ||
|
||
|
@@ -143,6 +144,7 @@ public bool TryHandleResponseSynchronously(DocumentServiceRequest request, TResp | |
|
||
bool isRetryWith = false; | ||
if (!GoneAndRetryWithRequestRetryPolicy<TResponse>.IsBaseGone(response, exception) && | ||
!GoneAndRetryWithRequestRetryPolicy<TResponse>.IsGoneWithLeaseNotFound(response, exception) && | ||
!(exception is RetryWithException) && | ||
!(GoneAndRetryWithRequestRetryPolicy<TResponse>.IsPartitionIsMigrating(response, exception) && (request.ServiceIdentity == null || request.ServiceIdentity.IsMasterService)) && | ||
!(GoneAndRetryWithRequestRetryPolicy<TResponse>.IsInvalidPartition(response, exception) && (request.PartitionKeyRangeIdentity == null || request.PartitionKeyRangeIdentity.CollectionRid == null)) && | ||
|
@@ -170,6 +172,21 @@ public bool TryHandleResponseSynchronously(DocumentServiceRequest request, TResp | |
isRetryWith = true; | ||
this.lastRetryWithException = exception as RetryWithException; | ||
} | ||
else if (GoneAndRetryWithRequestRetryPolicy<TResponse>.IsGoneWithLeaseNotFound(response, exception)) | ||
{ | ||
DefaultTrace.TraceWarning( | ||
"The GoneAndRetryWithRequestRetryPolicy has hit 410 with lease not found. This is by design to do a cross regional failover to handle the exception: {0}", | ||
new ErrorOrResponse(exception)); | ||
|
||
exceptionToThrow = ServiceUnavailableException.Create( | ||
SubStatusCodes.LeaseNotFound, | ||
innerException: exception); | ||
|
||
this.durationTimer.Stop(); | ||
|
||
shouldRetryResult = ShouldRetryResult.NoRetry(exceptionToThrow); | ||
return true; | ||
} | ||
|
||
int remainingMilliseconds; | ||
if (isRetryWith) | ||
|
@@ -428,6 +445,13 @@ private static bool IsPartitionKeyRangeGone(TResponse response, Exception except | |
|| (response?.StatusCode == HttpStatusCode.Gone && response?.SubStatusCode == SubStatusCodes.PartitionKeyRangeGone); | ||
} | ||
|
||
private static bool IsGoneWithLeaseNotFound(TResponse response, Exception exception) | ||
{ | ||
return exception is LeaseLostException | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No LeaseLostException has completely different meaning - that is thrown when Change Feed Processor is losing the CFP lease. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is taken care in the original msdata PR: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have created a new exception type: |
||
|| (response?.StatusCode == HttpStatusCode.Gone && | ||
(response?.SubStatusCode == SubStatusCodes.LeaseNotFound)); | ||
} | ||
|
||
private static void ClearRequestContext(DocumentServiceRequest request) | ||
{ | ||
request.RequestContext.TargetIdentity = null; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you are missing the condition that >1 write region need to be available (honoring preferred regions, excludedRegions etc.) - Could be included in GlobalEndpointManager.CanUseMultipleriteRegion but glancing at the code I did not see this check.