diff --git a/cli/docker.go b/cli/docker.go index 9239062..eae77da 100644 --- a/cli/docker.go +++ b/cli/docker.go @@ -40,6 +40,10 @@ const ( // EnvBoxContainerName is the name of the inner user container. EnvBoxPullImageSecretEnvVar = "CODER_IMAGE_PULL_SECRET" //nolint:gosec EnvBoxContainerName = "CODER_CVM_CONTAINER_NAME" + // We define a custom exit code to distinguish from the generic '1' when envbox exits due to a shutdown timeout. + // Docker claims exit codes 125-127 so we start at 150 to + // ensure we don't collide. + ExitCodeShutdownTimeout = 150 ) const ( @@ -103,6 +107,7 @@ var ( EnvDockerConfig = "CODER_DOCKER_CONFIG" EnvDebug = "CODER_DEBUG" EnvDisableIDMappedMount = "CODER_DISABLE_IDMAPPED_MOUNT" + EnvShutdownTimeout = "CODER_SHUTDOWN_TIMEOUT" ) var envboxPrivateMounts = map[string]struct{}{ @@ -140,6 +145,7 @@ type flags struct { cpus int memory int disableIDMappedMount bool + shutdownTimeout time.Duration // Test flags. noStartupLogs bool @@ -349,6 +355,7 @@ func dockerCmd(ch chan func() error) *cobra.Command { cliflag.IntVarP(cmd.Flags(), &flags.cpus, "cpus", "", EnvCPUs, 0, "Number of CPUs to allocate inner container. e.g. 2") cliflag.IntVarP(cmd.Flags(), &flags.memory, "memory", "", EnvMemory, 0, "Max memory to allocate to the inner container in bytes.") cliflag.BoolVarP(cmd.Flags(), &flags.disableIDMappedMount, "disable-idmapped-mount", "", EnvDisableIDMappedMount, false, "Disable idmapped mounts in sysbox. Note that you may need an alternative (e.g. shiftfs).") + cliflag.DurationVarP(cmd.Flags(), &flags.shutdownTimeout, "shutdown-timeout", "", EnvShutdownTimeout, time.Minute, "Duration after which envbox will be forcefully terminated.") // Test flags. cliflag.BoolVarP(cmd.Flags(), &flags.noStartupLogs, "no-startup-log", "", "", false, "Do not log startup logs. Useful for testing.") @@ -728,7 +735,13 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Docker shutdownCh <- func() error { log.Debug(ctx, "killing container", slog.F("bootstrap_pid", bootstrapPID)) - ctx, cancel := context.WithTimeout(ctx, time.Minute) + timeout := time.Minute + if flags.shutdownTimeout != time.Minute { + timeout = flags.shutdownTimeout + log.Debug(ctx, "using custom shutdown timeout", slog.F("timeout", timeout.String())) + } + + ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() // The PID returned is the PID _outside_ the container... //nolint:gosec diff --git a/cmd/envbox/main.go b/cmd/envbox/main.go index ebb77f9..995565d 100644 --- a/cmd/envbox/main.go +++ b/cmd/envbox/main.go @@ -8,6 +8,8 @@ import ( "runtime" "syscall" + "golang.org/x/xerrors" + "cdr.dev/slog" "cdr.dev/slog/sloggers/slogjson" "github.com/coder/envbox/cli" @@ -29,6 +31,9 @@ func main() { err := fn() if err != nil { log.Error(ctx, "shutdown function failed", slog.Error(err)) + if xerrors.Is(err, context.DeadlineExceeded) { + os.Exit(cli.ExitCodeShutdownTimeout) + } os.Exit(1) } default: diff --git a/integration/docker_test.go b/integration/docker_test.go index d043e76..90c1121 100644 --- a/integration/docker_test.go +++ b/integration/docker_test.go @@ -306,10 +306,8 @@ func TestDocker(t *testing.T) { require.Error(t, err) // Simulate a shutdown. - integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second) - - err = resource.Close() - require.NoError(t, err) + exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second) + require.Equal(t, 0, exitCode) t.Logf("envbox %q started successfully, recreating...", resource.Container.ID) // Run the envbox container. @@ -326,6 +324,35 @@ func TestDocker(t *testing.T) { }) require.NoError(t, err) }) + + t.Run("ShutdownTimeout", func(t *testing.T) { + t.Parallel() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + var ( + tmpdir = integrationtest.TmpDir(t) + binds = integrationtest.DefaultBinds(t, tmpdir) + ) + + envs := []string{fmt.Sprintf("%s=%s", cli.EnvShutdownTimeout, "1s")} + + // Run the envbox container. + resource := integrationtest.RunEnvbox(t, pool, &integrationtest.CreateDockerCVMConfig{ + Image: integrationtest.UbuntuImage, + Username: "root", + Envs: envs, + Binds: binds, + BootstrapScript: sigtrapForeverScript, + }) + + // Simulate a shutdown. + exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second) + // We expect it to timeout which should result in a special exit code. + require.Equal(t, cli.ExitCodeShutdownTimeout, exitCode) + }) + } func requireSliceNoContains(t *testing.T, ss []string, els ...string) { @@ -358,6 +385,20 @@ func bindMount(src, dest string, ro bool) string { return fmt.Sprintf("%s:%s", src, dest) } +const sigtrapForeverScript = `#!/bin/bash +cleanup() { + echo "Got a signal, going to sleep!" && sleep infinity + exit 0 +} + +trap 'cleanup' INT TERM + +while true; do + echo "Working..." + sleep 1 +done +` + const sigtrapScript = `#!/bin/bash cleanup() { echo "HANDLING A SIGNAL!" && touch /home/coder/foo && echo "touched file" diff --git a/integration/integrationtest/docker.go b/integration/integrationtest/docker.go index 088952c..0b78369 100644 --- a/integration/integrationtest/docker.go +++ b/integration/integrationtest/docker.go @@ -91,7 +91,12 @@ func RunEnvbox(t *testing.T, pool *dockertest.Pool, conf *CreateDockerCVMConfig) host.CPUQuota = int64(conf.CPUs) * int64(dockerutil.DefaultCPUPeriod) }) require.NoError(t, err) - // t.Cleanup(func() { _ = pool.Purge(resource) }) + t.Cleanup(func() { + // Only delete the container if the test passes. + if !t.Failed() { + resource.Close() + } + }) waitForCVM(t, pool, resource) @@ -264,7 +269,7 @@ func ExecEnvbox(t *testing.T, pool *dockertest.Pool, conf ExecConfig) ([]byte, e return buf.Bytes(), nil } -func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) { +func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) int { t.Helper() err := pool.Client.KillContainer(docker.KillContainerOptions{ @@ -283,10 +288,11 @@ func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Durat continue } - return + return cnt.State.ExitCode } t.Fatalf("timed out waiting for container %s to stop", id) + return 1 } // cmdLineEnvs returns args passed to the /envbox command