Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: overhaul docker container termination signals #192

Merged
merged 2 commits into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 50 additions & 28 deletions environment/docker/power.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"os"
"strings"
"syscall"
"time"

"emperror.dev/errors"
Expand Down Expand Up @@ -143,42 +142,49 @@ func (e *Environment) Stop(ctx context.Context) error {
s := e.meta.Stop
e.mu.RUnlock()

// A native "stop" as the Type field value will just skip over all of this
// logic and end up only executing the container stop command (which may or
// may not work as expected).
if s.Type == "" || s.Type == remote.ProcessStopSignal {
if s.Type == "" {
log.WithField("container_id", e.Id).Warn("no stop configuration detected for environment, using termination procedure")
}
// If the process is already offline don't switch it back to stopping. Just leave it how
// it is and continue through to the stop handling for the process.
if e.st.Load() != environment.ProcessOfflineState {
e.SetState(environment.ProcessStoppingState)
}

// Handle signal based actions
if s.Type == remote.ProcessStopSignal {
log.WithField("signal_value", s.Value).Debug("stopping server using signal")

signal := os.Kill
// Handle a few common cases, otherwise just fall through and just pass along
// the os.Kill signal to the process.
// Handle some common signals - Default to SIGKILL
signal := "SIGKILL"
switch strings.ToUpper(s.Value) {
case "SIGABRT":
signal = syscall.SIGABRT
case "SIGINT":
signal = syscall.SIGINT
signal = "SIGABRT"
case "SIGINT", "C":
signal = "SIGINT"
case "SIGTERM":
signal = syscall.SIGTERM
signal = "SIGTERM"
case "SIGKILL":
signal = "SIGKILL"
default:
log.Info("Unrecognised signal requested, defaulting to SIGKILL")
}
return e.Terminate(ctx, signal)
}

// If the process is already offline don't switch it back to stopping. Just leave it how
// it is and continue through to the stop handling for the process.
if e.st.Load() != environment.ProcessOfflineState {
e.SetState(environment.ProcessStoppingState)
return e.SignalContainer(ctx, signal)

}

// Handle command based stops
// Only attempt to send the stop command to the instance if we are actually attached to
// the instance. If we are not for some reason, just send the container stop event.
if e.IsAttached() && s.Type == remote.ProcessStopCommand {
return e.SendCommand(s.Value)
}

// Allow the stop action to run for however long it takes, similar to executing a command
// and using a different logic pathway to wait for the container to stop successfully.
if s.Type == "" {
log.WithField("container_id", e.Id).Warn("no stop configuration detected for environment, using native docker stop")
}

// Fallback to a native docker stop. As we aren't passing a signal to ContainerStop docker will
// attempt to stop the container using the default stop signal, SIGTERM, unless
// another signal was specified in the Dockerfile
//
// Using a negative timeout here will allow the container to stop gracefully,
// rather than forcefully terminating it. Value is in seconds, but -1 is
Expand Down Expand Up @@ -224,7 +230,7 @@ func (e *Environment) WaitForStop(ctx context.Context, duration time.Duration, t

doTermination := func(s string) error {
e.log().WithField("step", s).WithField("duration", duration).Warn("container stop did not complete in time, terminating process...")
return e.Terminate(ctx, os.Kill)
return e.Terminate(ctx, "SIGKILL")
}

// We pass through the timed context for this stop action so that if one of the
Expand Down Expand Up @@ -268,8 +274,8 @@ func (e *Environment) WaitForStop(ctx context.Context, duration time.Duration, t
return nil
}

// Terminate forcefully terminates the container using the signal provided.
func (e *Environment) Terminate(ctx context.Context, signal os.Signal) error {
// Sends the specified signal to the container in an attempt to stop it.
func (e *Environment) SignalContainer(ctx context.Context, signal string) error {
c, err := e.ContainerInspect(ctx)
if err != nil {
// Treat missing containers as an okay error state, means it is obviously
Expand All @@ -294,11 +300,27 @@ func (e *Environment) Terminate(ctx context.Context, signal os.Signal) error {

// We set it to stopping than offline to prevent crash detection from being triggered.
e.SetState(environment.ProcessStoppingState)
sig := strings.TrimSuffix(strings.TrimPrefix(signal.String(), "signal "), "ed")
if err := e.client.ContainerKill(ctx, e.Id, sig); err != nil && !client.IsErrNotFound(err) {
if err := e.client.ContainerKill(ctx, e.Id, signal); err != nil && !client.IsErrNotFound(err) {
return errors.WithStack(err)
}

return nil

}

// Terminate forcefully terminates the container using the signal provided.
// then sets its state to stopped.
func (e *Environment) Terminate(ctx context.Context, signal string) error {

// Send the signal to the container to kill it
if err := e.SignalContainer(ctx, signal); err != nil {
return errors.WithStack(err)
}

// We expect Terminate to instantly kill the container
// so go ahead and mark it as dead and clean up
e.SetState(environment.ProcessOfflineState)

return nil

}
3 changes: 1 addition & 2 deletions environment/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package environment

import (
"context"
"os"
"time"

"github.com/pterodactyl/wings/events"
Expand Down Expand Up @@ -72,7 +71,7 @@ type ProcessEnvironment interface {

// Terminate stops a running server instance using the provided signal. This function
// is a no-op if the server is already stopped.
Terminate(ctx context.Context, signal os.Signal) error
Terminate(ctx context.Context, signal string) error

// Destroys the environment removing any containers that were created (in Docker
// environments at least).
Expand Down
3 changes: 1 addition & 2 deletions server/power.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package server
import (
"context"
"fmt"
"os"
"time"

"emperror.dev/errors"
Expand Down Expand Up @@ -161,7 +160,7 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error

return s.Environment.Start(s.Context())
case PowerActionTerminate:
return s.Environment.Terminate(s.Context(), os.Kill)
return s.Environment.Terminate(s.Context(), "SIGKILL")
}

return errors.New("attempting to handle unknown power action")
Expand Down
Loading