Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add a 1m timeout to signal shutdown #92

Merged
merged 5 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ The environment variables can be used to configure various aspects of the inner
| `CODER_CPUS` | Dictates the number of CPUs to allocate the inner container. It is recommended to set this using the Kubernetes [Downward API](https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-container-fields-as-values-for-environment-variables). | false |
| `CODER_MEMORY` | Dictates the max memory (in bytes) to allocate the inner container. It is recommended to set this using the Kubernetes [Downward API](https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-container-fields-as-values-for-environment-variables). | false |
| `CODER_DISABLE_IDMAPPED_MOUNT` | Disables idmapped mounts in sysbox. For more information, see the [Sysbox Documentation](https://github.com/nestybox/sysbox/blob/master/docs/user-guide/configuration.md#disabling-id-mapped-mounts-on-sysbox). | false |
| `CODER_SHUTDOWN_TIMEOUT` | Configure a custom shutdown timeout to wait for the boostrap command to exit. Defaults to 1 minute. | false |

## Coder Template

Expand Down
36 changes: 28 additions & 8 deletions cli/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"sort"
"strconv"
"strings"
"time"

dockertypes "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
Expand All @@ -39,6 +40,10 @@ const (
// EnvBoxContainerName is the name of the inner user container.
EnvBoxPullImageSecretEnvVar = "CODER_IMAGE_PULL_SECRET" //nolint:gosec
EnvBoxContainerName = "CODER_CVM_CONTAINER_NAME"
// We define a custom exit code to distinguish from the generic '1' when envbox exits due to a shutdown timeout.
// Docker claims exit codes 125-127 so we start at 150 to
// ensure we don't collide.
ExitCodeShutdownTimeout = 150
)

const (
Expand Down Expand Up @@ -77,8 +82,9 @@ const (
// with UID/GID 1000 will be mapped to `UserNamespaceOffset` + 1000
// on the host. Changing this value will result in improper mappings
// on existing containers.
UserNamespaceOffset = 100000
devDir = "/dev"
UserNamespaceOffset = 100000
devDir = "/dev"
defaultShutdownTimeout = time.Minute
)

var (
Expand All @@ -102,6 +108,7 @@ var (
EnvDockerConfig = "CODER_DOCKER_CONFIG"
EnvDebug = "CODER_DEBUG"
EnvDisableIDMappedMount = "CODER_DISABLE_IDMAPPED_MOUNT"
EnvShutdownTimeout = "CODER_SHUTDOWN_TIMEOUT"
)

var envboxPrivateMounts = map[string]struct{}{
Expand Down Expand Up @@ -139,6 +146,7 @@ type flags struct {
cpus int
memory int
disableIDMappedMount bool
shutdownTimeout time.Duration

// Test flags.
noStartupLogs bool
Expand Down Expand Up @@ -348,6 +356,7 @@ func dockerCmd(ch chan func() error) *cobra.Command {
cliflag.IntVarP(cmd.Flags(), &flags.cpus, "cpus", "", EnvCPUs, 0, "Number of CPUs to allocate inner container. e.g. 2")
cliflag.IntVarP(cmd.Flags(), &flags.memory, "memory", "", EnvMemory, 0, "Max memory to allocate to the inner container in bytes.")
cliflag.BoolVarP(cmd.Flags(), &flags.disableIDMappedMount, "disable-idmapped-mount", "", EnvDisableIDMappedMount, false, "Disable idmapped mounts in sysbox. Note that you may need an alternative (e.g. shiftfs).")
cliflag.DurationVarP(cmd.Flags(), &flags.shutdownTimeout, "shutdown-timeout", "", EnvShutdownTimeout, defaultShutdownTimeout, "Duration after which envbox will be forcefully terminated.")

// Test flags.
cliflag.BoolVarP(cmd.Flags(), &flags.noStartupLogs, "no-startup-log", "", "", false, "Do not log startup logs. Useful for testing.")
Expand Down Expand Up @@ -724,27 +733,38 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Docker
return xerrors.Errorf("exec inspect: %w", err)
}

shutdownCh <- func() error {
log.Debug(ctx, "killing container", slog.F("bootstrap_pid", bootstrapPID))
shutdownCh <- killBootstrapCmd(ctx, log, bootstrapPID, bootstrapExec.ID, client, flags.shutdownTimeout)

return nil
}

// KillBootstrapCmd is the command we run when we receive a signal
// to kill the envbox container.
func killBootstrapCmd(ctx context.Context, log slog.Logger, pid int, execID string, client dockerutil.DockerClient, timeout time.Duration) func() error {
return func() error {
log.Debug(ctx, "killing container",
slog.F("bootstrap_pid", pid),
slog.F("timeout", timeout.String()),
)

ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
// The PID returned is the PID _outside_ the container...
//nolint:gosec
out, err := exec.Command("kill", "-TERM", strconv.Itoa(bootstrapPID)).CombinedOutput()
out, err := exec.CommandContext(ctx, "kill", "-TERM", strconv.Itoa(pid)).CombinedOutput()
if err != nil {
return xerrors.Errorf("kill bootstrap process (%s): %w", out, err)
}

log.Debug(ctx, "sent kill signal waiting for process to exit")
err = dockerutil.WaitForExit(ctx, client, bootstrapExec.ID)
err = dockerutil.WaitForExit(ctx, client, execID)
if err != nil {
return xerrors.Errorf("wait for exit: %w", err)
}

log.Debug(ctx, "bootstrap process successfully exited")
return nil
}

return nil
}

//nolint:revive
Expand Down
5 changes: 5 additions & 0 deletions cmd/envbox/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"runtime"
"syscall"

"golang.org/x/xerrors"

"cdr.dev/slog"
"cdr.dev/slog/sloggers/slogjson"
"github.com/coder/envbox/cli"
Expand All @@ -29,6 +31,9 @@ func main() {
err := fn()
if err != nil {
log.Error(ctx, "shutdown function failed", slog.Error(err))
if xerrors.Is(err, context.DeadlineExceeded) {
os.Exit(cli.ExitCodeShutdownTimeout)
}
os.Exit(1)
}
default:
Expand Down
48 changes: 44 additions & 4 deletions integration/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,8 @@ func TestDocker(t *testing.T) {
require.Error(t, err)

// Simulate a shutdown.
integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)

err = resource.Close()
require.NoError(t, err)
exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)
require.Equal(t, 0, exitCode)

t.Logf("envbox %q started successfully, recreating...", resource.Container.ID)
// Run the envbox container.
Expand All @@ -326,6 +324,34 @@ func TestDocker(t *testing.T) {
})
require.NoError(t, err)
})

t.Run("ShutdownTimeout", func(t *testing.T) {
t.Parallel()

pool, err := dockertest.NewPool("")
require.NoError(t, err)

var (
tmpdir = integrationtest.TmpDir(t)
binds = integrationtest.DefaultBinds(t, tmpdir)
)

envs := []string{fmt.Sprintf("%s=%s", cli.EnvShutdownTimeout, "1s")}

// Run the envbox container.
resource := integrationtest.RunEnvbox(t, pool, &integrationtest.CreateDockerCVMConfig{
Image: integrationtest.UbuntuImage,
Username: "root",
Envs: envs,
Binds: binds,
BootstrapScript: sigtrapForeverScript,
})

// Simulate a shutdown.
exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)
// We expect it to timeout which should result in a special exit code.
require.Equal(t, cli.ExitCodeShutdownTimeout, exitCode)
})
}

func requireSliceNoContains(t *testing.T, ss []string, els ...string) {
Expand Down Expand Up @@ -358,6 +384,20 @@ func bindMount(src, dest string, ro bool) string {
return fmt.Sprintf("%s:%s", src, dest)
}

const sigtrapForeverScript = `#!/bin/bash
cleanup() {
echo "Got a signal, going to sleep!" && sleep infinity
exit 0
}

trap 'cleanup' INT TERM

while true; do
echo "Working..."
sleep 1
done
`

const sigtrapScript = `#!/bin/bash
cleanup() {
echo "HANDLING A SIGNAL!" && touch /home/coder/foo && echo "touched file"
Expand Down
12 changes: 9 additions & 3 deletions integration/integrationtest/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,12 @@ func RunEnvbox(t *testing.T, pool *dockertest.Pool, conf *CreateDockerCVMConfig)
host.CPUQuota = int64(conf.CPUs) * int64(dockerutil.DefaultCPUPeriod)
})
require.NoError(t, err)
// t.Cleanup(func() { _ = pool.Purge(resource) })
t.Cleanup(func() {
// Only delete the container if the test passes.
if !t.Failed() {
resource.Close()
}
})

waitForCVM(t, pool, resource)

Expand Down Expand Up @@ -264,7 +269,7 @@ func ExecEnvbox(t *testing.T, pool *dockertest.Pool, conf ExecConfig) ([]byte, e
return buf.Bytes(), nil
}

func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) {
func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) int {
t.Helper()

err := pool.Client.KillContainer(docker.KillContainerOptions{
Expand All @@ -283,10 +288,11 @@ func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Durat
continue
}

return
return cnt.State.ExitCode
}

t.Fatalf("timed out waiting for container %s to stop", id)
return 1
}

// cmdLineEnvs returns args passed to the /envbox command
Expand Down
Loading