Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow setting shutdown timeout for primary and audit instances #4822

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@
"MaximumConcurrencyLevel": null,
"ServiceControlQueueAddress": "Particular.ServiceControl",
"TimeToRestartAuditIngestionAfterFailure": "00:01:00",
"EnableFullTextSearchOnBodies": true
"EnableFullTextSearchOnBodies": true,
"ShutdownTimeout": "00:02:00"
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static void AddServiceControlAudit(this IHostApplicationBuilder builder,
var transportCustomization = TransportFactory.Create(transportSettings);
transportCustomization.AddTransportForAudit(services, transportSettings);

services.Configure<HostOptions>(options => options.ShutdownTimeout = TimeSpan.FromSeconds(30));
services.Configure<HostOptions>(options => options.ShutdownTimeout = settings.ShutdownTimeout);

services.AddSingleton(settings);
services.AddSingleton<EndpointInstanceMonitoring>();
Expand Down
6 changes: 6 additions & 0 deletions src/ServiceControl.Audit/Infrastructure/Settings/Settings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public Settings(string transportType = null, string persisterType = null, Loggin
ServiceControlQueueAddress = SettingsReader.Read<string>(SettingsRootNamespace, "ServiceControlQueueAddress");
TimeToRestartAuditIngestionAfterFailure = GetTimeToRestartAuditIngestionAfterFailure();
EnableFullTextSearchOnBodies = SettingsReader.Read(SettingsRootNamespace, "EnableFullTextSearchOnBodies", true);
ShutdownTimeout = SettingsReader.Read(SettingsRootNamespace, "ShutdownTimeout", ShutdownTimeout);

AssemblyLoadContextResolver = static assemblyPath => new PluginAssemblyLoadContext(assemblyPath);
}
Expand Down Expand Up @@ -152,6 +153,11 @@ public int MaxBodySizeToStore

public bool EnableFullTextSearchOnBodies { get; set; }

// Windows services allow a maximum of 125 seconds when stopping a service.
// When shutting down or restarting the OS we have no control over the
// shutdown timeout
public TimeSpan ShutdownTimeout { get; set; } = TimeSpan.FromMinutes(2);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only gives 5 seconds of "cancellation" which I think is sufficient because it should be a sort of let everything go and log about it but do not delay termination


public TransportSettings ToTransportSettings()
{
var transportSettings = new TransportSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@
"MaximumConcurrencyLevel": null,
"RetryHistoryDepth": 10,
"RemoteInstances": [],
"DisableHealthChecks": false
"DisableHealthChecks": false,
"ShutdownTimeout": "00:02:00"
}
2 changes: 1 addition & 1 deletion src/ServiceControl/HostApplicationBuilderExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static void AddServiceControl(this IHostApplicationBuilder hostBuilder, S
var transportCustomization = TransportFactory.Create(transportSettings);
transportCustomization.AddTransportForPrimary(services, transportSettings);

services.Configure<HostOptions>(options => options.ShutdownTimeout = TimeSpan.FromSeconds(30));
services.Configure<HostOptions>(options => options.ShutdownTimeout = settings.ShutdownTimeout);
services.AddSingleton<IDomainEvents, DomainEvents>();

services.AddSingleton<MessageStreamerHub>();
Expand Down
5 changes: 5 additions & 0 deletions src/ServiceControl/Infrastructure/Settings/Settings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public Settings(
TimeToRestartErrorIngestionAfterFailure = GetTimeToRestartErrorIngestionAfterFailure();
DisableExternalIntegrationsPublishing = SettingsReader.Read(SettingsRootNamespace, "DisableExternalIntegrationsPublishing", false);
TrackInstancesInitialValue = SettingsReader.Read(SettingsRootNamespace, "TrackInstancesInitialValue", true);
ShutdownTimeout = SettingsReader.Read(SettingsRootNamespace, "ShutdownTimeout", ShutdownTimeout);
AssemblyLoadContextResolver = static assemblyPath => new PluginAssemblyLoadContext(assemblyPath);
}

Expand Down Expand Up @@ -180,6 +181,10 @@ public TimeSpan HeartbeatGracePeriod

public bool DisableHealthChecks { get; set; }

// The default value is set to the maximum allowed time by the most restrictive
// hosting platform, which Docker Linux containers.
public TimeSpan ShutdownTimeout { get; set; } = TimeSpan.FromSeconds(10);
Comment on lines +184 to +186
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

10 seconds is when we get killed, you set the timeout to allow for cancellation.

As in:

  • if we're interested in cancellation, set it to less than the host its kill duration
  • if we're not interested in cancellation, set this to a high value that gives us time to run a graceful termination

With external, I think 5 seconds is already plenty
With internal, I want to wait as long as possible

So I like your last commits, but if we want to see cancellation to take place this number should be even lower:

Suggested change
// The default value is set to the maximum allowed time by the most restrictive
// hosting platform, which Docker Linux containers.
public TimeSpan ShutdownTimeout { get; set; } = TimeSpan.FromSeconds(10);
// The default value is set to the maximum allowed time by the most restrictive
// hosting platform, which Docker Linux containers minus a few seconds
// to actually allow for cancellation and logging to take place
public TimeSpan ShutdownTimeout { get; set; } = TimeSpan.FromSeconds(5);


public string GetConnectionString()
{
var settingsValue = SettingsReader.Read<string>(SettingsRootNamespace, "ConnectionString");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ protected override void UpdateSettings()
settings.Set(ServiceControlSettings.EnableFullTextSearchOnBodies, details.EnableFullTextSearchOnBodies.ToString(), version);
settings.Set(ServiceControlSettings.RemoteInstances, RemoteInstanceConverter.ToJson(details.RemoteInstances), version);

// Windows services allow a maximum of 125 seconds when stopping a service.
// When shutting down or restarting the OS we have no control over the
// shutdown timeout. This is by the installer engine that is run _only_ on
// Windows via SCMU or PowerShell
settings.Set(ServiceControlSettings.ShutdownTimeout, "00:02:00");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not that familiar with this. This is added during an upgrade to the config?


// Retired settings
settings.RemoveIfRetired(ServiceControlSettings.AuditQueue, version);
settings.RemoveIfRetired(ServiceControlSettings.AuditLogQueue, version);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,11 @@ public static class ServiceControlSettings
Name = "ServiceControl/EnableFullTextSearchOnBodies",
SupportedFrom = new SemanticVersion(4, 17, 0)
};

public static SettingInfo ShutdownTimeout = new()
{
Name = "ServiceControl/ShutdownTimeout",
SupportedFrom = new SemanticVersion(6, 4, 0)
};
}
}
Loading