From 7e6327bf6759cf7ac9a85d918eb8cfa23f17cf75 Mon Sep 17 00:00:00 2001 From: jh Date: Mon, 25 Nov 2024 00:39:51 +0800 Subject: [PATCH] generating creating-state.json instead of state.json during creating Signed-off-by: jh --- delete.go | 26 ++++++++++++ libcontainer/container_linux.go | 75 ++++++++++++++++++++++++++++++--- libcontainer/criu_linux.go | 2 +- libcontainer/factory_linux.go | 5 ++- libcontainer/process_linux.go | 24 ++++++++++- 5 files changed, 123 insertions(+), 9 deletions(-) diff --git a/delete.go b/delete.go index fc8133438ea..29d4533eccc 100644 --- a/delete.go +++ b/delete.go @@ -24,6 +24,29 @@ func killContainer(container *libcontainer.Container) error { return errors.New("container init still running") } +// tryDeleteCreatingState is responsible for deleting +// containers that failed during the creation process. +func tryDeleteCreatingState(context *cli.Context) error { + id := context.Args().First() + if id == "" { + return errEmptyID + } + root := context.GlobalString("root") + abs, err := filepath.Abs(root) + if err != nil { + return err + } + state, err := libcontainer.LoadCreatingState(abs) + if err != nil { + return err + } + if err := libcontainer.DestroyCreating(state, id); err != nil { + return err + } + // load creating state + return nil +} + var deleteCommand = cli.Command{ Name: "delete", Usage: "delete any resources held by the container often used with detached container", @@ -53,6 +76,9 @@ status of "ubuntu01" as "stopped" the following will delete resources held for container, err := getContainer(context) if err != nil { if errors.Is(err, libcontainer.ErrNotExist) { + if err := tryDeleteCreatingState(context); err != nil { + fmt.Fprintf(os.Stderr, "try clear creating state failed: %v", err) + } // if there was an aborted start or something of the sort then the container's directory could exist but // libcontainer does not see it because the state.json file inside that directory was never created. path := filepath.Join(context.GlobalString("root"), id) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index c02116177ad..a3b762d15ef 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -2,6 +2,7 @@ package libcontainer import ( "bytes" + "encoding/json" "errors" "fmt" "io" @@ -15,12 +16,14 @@ import ( "sync" "time" + securejoin "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/manager" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/dmz" "github.com/opencontainers/runc/libcontainer/intelrdt" @@ -192,7 +195,7 @@ func (c *Container) Set(config configs.Config) error { } // After config setting succeed, update config and states c.config = &config - _, err = c.updateState(nil) + _, err = c.updateState(nil, false) return err } @@ -798,18 +801,18 @@ func (c *Container) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, return notifyMemoryPressure(c.cgroupManager.Path("memory"), level) } -func (c *Container) updateState(process parentProcess) (*State, error) { +func (c *Container) updateState(process parentProcess, isCreating bool) (*State, error) { if process != nil { c.initProcess = process } state := c.currentState() - if err := c.saveState(state); err != nil { + if err := c.saveState(state, isCreating); err != nil { return nil, err } return state, nil } -func (c *Container) saveState(s *State) (retErr error) { +func (c *Container) saveState(s *State, isCreating bool) (retErr error) { tmpFile, err := os.CreateTemp(c.stateDir, "state-") if err != nil { return err @@ -831,10 +834,27 @@ func (c *Container) saveState(s *State) (retErr error) { return err } - stateFilePath := filepath.Join(c.stateDir, stateFilename) + var stateFilePath string + if isCreating { + stateFilePath = filepath.Join(c.stateDir, creatingStateFilename) + } else { + stateFilePath = filepath.Join(c.stateDir, stateFilename) + } return os.Rename(tmpFile.Name(), stateFilePath) } +func (c *Container) clearCreatingState() error { + creatingStateFilePath := filepath.Join(c.stateDir, creatingStateFilename) + err := os.Remove(creatingStateFilePath) + if err != nil { + if os.IsNotExist(err) { + logrus.Warnf("%s is not exist", creatingStateFilePath) + return nil + } + } + return err +} + func (c *Container) currentStatus() (Status, error) { if err := c.refreshState(); err != nil { return -1, err @@ -1160,3 +1180,48 @@ func requiresRootOrMappingTool(c *configs.Config) bool { } return !reflect.DeepEqual(c.GIDMappings, gidMap) } + +// LoadCreatingState loads the creating-state.json file into the State structure. +func LoadCreatingState(root string) (*State, error) { + stateFilePath, err := securejoin.SecureJoin(root, creatingStateFilename) + if err != nil { + return nil, err + } + f, err := os.Open(stateFilePath) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotExist + } + return nil, err + } + defer f.Close() + var state *State + if err := json.NewDecoder(f).Decode(&state); err != nil { + return nil, err + } + return state, nil +} + +// DestroyCreating handles the cleanup of containers that failed during +// the creation process. +func DestroyCreating(state *State, id string) error { + cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths) + if err != nil { + return err + } + intelRdtManager := intelrdt.NewManager(&state.Config, id, state.IntelRdtPath) + + // Before the cgroup is created, the creating-state.json file cannot accurately + // record the PID of the container's init process. To ensure the cgroup can be + // deleted, the PIDs are retrieved from cgroup.procs and terminated one by one. + if err := signalAllProcesses(cm, unix.SIGKILL); err != nil { + logrus.Warn(err) + } + err = cm.Destroy() + if intelRdtManager != nil { + if ierr := intelRdtManager.Destroy(); err == nil { + err = ierr + } + } + return err +} diff --git a/libcontainer/criu_linux.go b/libcontainer/criu_linux.go index fed34e79148..9f772ddad8b 100644 --- a/libcontainer/criu_linux.go +++ b/libcontainer/criu_linux.go @@ -1146,7 +1146,7 @@ func (c *Container) criuNotifications(resp *criurpc.CriuResp, process *Process, } // create a timestamp indicating when the restored checkpoint was started c.created = time.Now().UTC() - if _, err := c.updateState(r); err != nil { + if _, err := c.updateState(r, false); err != nil { return err } if err := os.Remove(filepath.Join(c.stateDir, "checkpoint")); err != nil { diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index b13f8bf9bb3..9840729b0bd 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -17,8 +17,9 @@ import ( ) const ( - stateFilename = "state.json" - execFifoFilename = "exec.fifo" + stateFilename = "state.json" + creatingStateFilename = "creating-state.json" + execFifoFilename = "exec.fifo" ) // Create creates a new container with the given id inside a given state diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index fcbb54a3e41..0d48038d83f 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -561,6 +561,24 @@ func (p *initProcess) start() (retErr error) { } }() + // runc might receive a SIGKILL before creating the state.json, which + // would cause the runc delete command to fail. + // Generating this file in advance also introduces issues, such as: + // 1. The pid data might be inaccurate. + // 2. Other runc commands might load this incomplete state.json file. + + // Fortunately, as long as the cgroup path is known, cleanup operations + // can still be performed. To handle this, a temporary file named + // creating-state.json is generated, containing the same content as + // state.json. Once the state.json file is successfully created, the + // creating-state.json file is deleted. This ensures that other commands + // cannot use the temporary file, allowing only the runc delete command + // to access it. + _, err = p.container.updateState(p, true) + if err != nil { + return fmt.Errorf("unable to store creating state: %w", err) + } + // Do this before syncing with child so that no children can escape the // cgroup. We don't need to worry about not doing this and not being root // because we'd be using the rootless cgroup manager in that case. @@ -730,10 +748,14 @@ func (p *initProcess) start() (retErr error) { // In order to cleanup the runc-init[2:stage] by // runc-delete/stop, we should store the status before // procRun sync. - state, uerr := p.container.updateState(p) + state, uerr := p.container.updateState(p, false) if uerr != nil { return fmt.Errorf("unable to store init state: %w", uerr) } + // now delete creating-state.json + if err := p.container.clearCreatingState(); err != nil { + return fmt.Errorf("unable to remove creating state: %w", err) + } p.container.initProcessStartTime = state.InitProcessStartTime // Sync with child.