From 283cc30a435968ea8e8f99626a320e31d8a0b149 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 7 Feb 2024 23:32:48 +0100 Subject: [PATCH] Allow mps root to be specified This change allows the MPS root on the host to be specified and uses /run/nvidia/mps by default. Signed-off-by: Evan Lezar --- api/config/v1/flags.go | 3 + cmd/mps-control-daemon/mps/daemon.go | 29 +++++---- cmd/mps-control-daemon/mps/manager.go | 2 +- cmd/mps-control-daemon/mps/root.go | 59 +++++++++++++++++++ cmd/nvidia-device-plugin/main.go | 8 +++ .../templates/daemonset-device-plugin.yml | 7 ++- .../daemonset-mps-control-daemon.yml | 5 +- .../helm/nvidia-device-plugin/values.yaml | 7 +++ internal/plugin/server.go | 45 ++++++++------ 9 files changed, 126 insertions(+), 39 deletions(-) create mode 100644 cmd/mps-control-daemon/mps/root.go diff --git a/api/config/v1/flags.go b/api/config/v1/flags.go index 5b4fd61d9..ea1a8d25a 100644 --- a/api/config/v1/flags.go +++ b/api/config/v1/flags.go @@ -57,6 +57,7 @@ type Flags struct { type CommandLineFlags struct { MigStrategy *string `json:"migStrategy" yaml:"migStrategy"` FailOnInitError *bool `json:"failOnInitError" yaml:"failOnInitError"` + MpsRoot *string `json:"mpsRoot,omitempty" yaml:"mpsRoot,omitempty"` NvidiaDriverRoot *string `json:"nvidiaDriverRoot,omitempty" yaml:"nvidiaDriverRoot,omitempty"` GDSEnabled *bool `json:"gdsEnabled" yaml:"gdsEnabled"` MOFEDEnabled *bool `json:"mofedEnabled" yaml:"mofedEnabled"` @@ -116,6 +117,8 @@ func (f *Flags) UpdateFromCLIFlags(c *cli.Context, flags []cli.Flag) { updateFromCLIFlag(&f.MigStrategy, c, n) case "fail-on-init-error": updateFromCLIFlag(&f.FailOnInitError, c, n) + case "mps-root": + updateFromCLIFlag(&f.MpsRoot, c, n) case "nvidia-driver-root": updateFromCLIFlag(&f.NvidiaDriverRoot, c, n) case "gds-enabled": diff --git a/cmd/mps-control-daemon/mps/daemon.go b/cmd/mps-control-daemon/mps/daemon.go index b832ec595..b0a9e580c 100644 --- a/cmd/mps-control-daemon/mps/daemon.go +++ b/cmd/mps-control-daemon/mps/daemon.go @@ -22,7 +22,6 @@ import ( "io" "os" "os/exec" - "path/filepath" "k8s.io/klog/v2" @@ -46,14 +45,14 @@ type Daemon struct { rm rm.ResourceManager // root represents the root at which the files and folders controlled by the // daemon are created. These include the log and pipe directories. - root string + root Root } // NewDaemon creates an MPS daemon instance. -func NewDaemon(rm rm.ResourceManager) *Daemon { +func NewDaemon(rm rm.ResourceManager, root Root) *Daemon { return &Daemon{ rm: rm, - root: "/mps", + root: root, } } @@ -77,8 +76,8 @@ func (e envvars) toSlice() []string { // TODO: Set CUDA_VISIBLE_DEVICES to include only the devices for this resource type. func (d *Daemon) Envvars() envvars { return map[string]string{ - "CUDA_MPS_PIPE_DIRECTORY": d.pipeDir(), - "CUDA_MPS_LOG_DIRECTORY": d.logDir(), + "CUDA_MPS_PIPE_DIRECTORY": d.PipeDir(), + "CUDA_MPS_LOG_DIRECTORY": d.LogDir(), } } @@ -90,12 +89,12 @@ func (d *Daemon) Start() error { klog.InfoS("Staring MPS daemon", "resource", d.rm.Resource()) - pipeDir := d.pipeDir() + pipeDir := d.PipeDir() if err := os.MkdirAll(pipeDir, 0755); err != nil { return fmt.Errorf("error creating directory %v: %w", pipeDir, err) } - logDir := d.logDir() + logDir := d.LogDir() if err := os.MkdirAll(logDir, 0755); err != nil { return fmt.Errorf("error creating directory %v: %w", logDir, err) } @@ -147,20 +146,20 @@ func (d *Daemon) Stop() error { return nil } -func (d *Daemon) resourceRoot() string { - return filepath.Join(d.root, string(d.rm.Resource())) +func (d *Daemon) LogDir() string { + return d.root.LogDir(d.rm.Resource()) } -func (d *Daemon) pipeDir() string { - return filepath.Join(d.resourceRoot(), "pipe") +func (d *Daemon) PipeDir() string { + return d.root.PipeDir(d.rm.Resource()) } -func (d *Daemon) logDir() string { - return filepath.Join(d.resourceRoot(), "log") +func (d *Daemon) ShmDir() string { + return "/dev/shm" } func (d *Daemon) startedFile() string { - return filepath.Join(d.resourceRoot(), ".started") + return d.root.startedFile(d.rm.Resource()) } // AssertHealthy checks that the MPS control daemon is healthy. diff --git a/cmd/mps-control-daemon/mps/manager.go b/cmd/mps-control-daemon/mps/manager.go index 92bcabd6a..c4463411b 100644 --- a/cmd/mps-control-daemon/mps/manager.go +++ b/cmd/mps-control-daemon/mps/manager.go @@ -85,7 +85,7 @@ func (m *manager) Daemons() ([]*Daemon, error) { klog.InfoS("Resource is not shared", "resource", "resource", resourceManager.Resource()) continue } - daemon := NewDaemon(resourceManager) + daemon := NewDaemon(resourceManager, ContainerRoot) daemons = append(daemons, daemon) } diff --git a/cmd/mps-control-daemon/mps/root.go b/cmd/mps-control-daemon/mps/root.go new file mode 100644 index 000000000..90655d12e --- /dev/null +++ b/cmd/mps-control-daemon/mps/root.go @@ -0,0 +1,59 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package mps + +import ( + "path/filepath" + + spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1" +) + +const ( + ContainerRoot = Root("/mps") +) + +// Root represents an MPS root. +// This is where per-resource pipe and log directories are created. +// For containerised applications the host root is typically mounted to /mps in the container. +type Root string + +// LogDir returns the per-resource pipe dir for the specified root. +func (r Root) LogDir(resourceName spec.ResourceName) string { + return r.Path(string(resourceName), "log") +} + +// PipeDir returns the per-resource pipe dir for the specified root. +func (r Root) PipeDir(resourceName spec.ResourceName) string { + return r.Path(string(resourceName), "pipe") +} + +// ShmDir returns the shm dir associated with the root. +// Note that the shm dir is the same for all resources. +func (r Root) ShmDir(resourceName spec.ResourceName) string { + return r.Path("shm") +} + +// startedFile returns the per-resource .started file name for the specified root. +func (r Root) startedFile(resourceName spec.ResourceName) string { + return r.Path(string(resourceName), ".started") +} + +// Path returns a path relative to the MPS root. +func (r Root) Path(parts ...string) string { + pathparts := append([]string{string(r)}, parts...) + return filepath.Join(pathparts...) +} diff --git a/cmd/nvidia-device-plugin/main.go b/cmd/nvidia-device-plugin/main.go index a68df306b..423e43d20 100644 --- a/cmd/nvidia-device-plugin/main.go +++ b/cmd/nvidia-device-plugin/main.go @@ -120,6 +120,11 @@ func main() { Usage: "the path where the NVIDIA driver root is mounted in the container; used for generating CDI specifications", EnvVars: []string{"CONTAINER_DRIVER_ROOT"}, }, + &cli.StringFlag{ + Name: "mps-root", + Usage: "the path on the host where MPS-specific mounts and files are created by the MPS control daemon manager", + EnvVars: []string{"MPS_ROOT"}, + }, } err := c.Run(os.Args) @@ -148,6 +153,9 @@ func validateFlags(config *spec.Config) error { if *config.Flags.MigStrategy == spec.MigStrategyMixed { return fmt.Errorf("using --mig-strategy=mixed is not supported with MPS") } + if config.Flags.MpsRoot == nil || *config.Flags.MpsRoot == "" { + return fmt.Errorf("using MPS requires --mps-root to be specified") + } } return nil diff --git a/deployments/helm/nvidia-device-plugin/templates/daemonset-device-plugin.yml b/deployments/helm/nvidia-device-plugin/templates/daemonset-device-plugin.yml index 582be5f0f..96102cc82 100644 --- a/deployments/helm/nvidia-device-plugin/templates/daemonset-device-plugin.yml +++ b/deployments/helm/nvidia-device-plugin/templates/daemonset-device-plugin.yml @@ -136,6 +136,8 @@ spec: name: nvidia-device-plugin-ctr command: ["nvidia-device-plugin"] env: + - name: MPS_ROOT + value: "{{ .Values.mps.root }}" {{- if typeIs "string" .Values.migStrategy }} - name: MIG_STRATEGY value: "{{ .Values.migStrategy }}" @@ -215,12 +217,11 @@ spec: path: /var/lib/kubelet/device-plugins - name: mps-root hostPath: - # TODO: This should be /var/run/nvidia/mps - path: /var/lib/kubelet/device-plugins/mps + path: {{ .Values.mps.root }} type: DirectoryOrCreate - name: mps-shm hostPath: - path: /var/lib/kubelet/device-plugins/mps/shm + path: {{ .Values.mps.root }}/shm {{- if typeIs "string" .Values.nvidiaDriverRoot }} - name: driver-root hostPath: diff --git a/deployments/helm/nvidia-device-plugin/templates/daemonset-mps-control-daemon.yml b/deployments/helm/nvidia-device-plugin/templates/daemonset-mps-control-daemon.yml index c23bcc374..e79b86d49 100644 --- a/deployments/helm/nvidia-device-plugin/templates/daemonset-mps-control-daemon.yml +++ b/deployments/helm/nvidia-device-plugin/templates/daemonset-mps-control-daemon.yml @@ -194,12 +194,11 @@ spec: volumes: - name: mps-root hostPath: - # TODO: This should be /var/run/nvidia/mps - path: /var/lib/kubelet/device-plugins/mps + path: {{ .Values.mps.root }} type: DirectoryOrCreate - name: mps-shm hostPath: - path: /var/lib/kubelet/device-plugins/mps/shm + path: {{ .Values.mps.root }}/shm {{- if eq $hasConfigMap "true" }} - name: available-configs configMap: diff --git a/deployments/helm/nvidia-device-plugin/values.yaml b/deployments/helm/nvidia-device-plugin/values.yaml index 21d8d7f2f..f683775c0 100644 --- a/deployments/helm/nvidia-device-plugin/values.yaml +++ b/deployments/helm/nvidia-device-plugin/values.yaml @@ -145,3 +145,10 @@ nfd: - "0302" deviceLabelFields: - vendor + +mps: + # root specifies the location where files and folders for managing MPS will + # be created. This includes a daemon-specific /dev/shm and pipe and log + # directories. + # Pipe directories will be created at {{ mps.root }}/{{ .ResourceName }} + root: "/run/nvidia/mps" diff --git a/internal/plugin/server.go b/internal/plugin/server.go index a9f5ee687..db62f4f71 100644 --- a/internal/plugin/server.go +++ b/internal/plugin/server.go @@ -63,6 +63,9 @@ type NvidiaDevicePlugin struct { server *grpc.Server health chan *rm.Device stop chan interface{} + + mpsDaemon *mps.Daemon + mpsHostRoot mps.Root } // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin @@ -74,6 +77,13 @@ func NewNvidiaDevicePlugin(config *spec.Config, resourceManager rm.ResourceManag pluginName := "nvidia-" + name pluginPath := filepath.Join(pluginapi.DevicePluginPath, pluginName) + var mpsDaemon *mps.Daemon + var mpsHostRoot mps.Root + if config.Sharing.SharingStrategy() != spec.SharingStrategyMPS { + mpsDaemon = mps.NewDaemon(resourceManager, mps.ContainerRoot) + mpsHostRoot = mps.Root(*config.Flags.CommandLineFlags.MpsRoot) + } + return &NvidiaDevicePlugin{ rm: resourceManager, config: config, @@ -83,6 +93,9 @@ func NewNvidiaDevicePlugin(config *spec.Config, resourceManager rm.ResourceManag cdiHandler: cdiHandler, cdiAnnotationPrefix: *config.Flags.Plugin.CDIAnnotationPrefix, + mpsDaemon: mpsDaemon, + mpsHostRoot: mpsHostRoot, + // These will be reinitialized every // time the plugin server is restarted. server: nil, @@ -148,11 +161,12 @@ func (plugin *NvidiaDevicePlugin) waitForMPSDaemon() error { if plugin.config.Sharing.SharingStrategy() != spec.SharingStrategyMPS { return nil } - // TODO: Check the started file here. + // TODO: Check the .ready file here. // TODO: Have some retry strategy here. - if err := mps.NewDaemon(plugin.rm).AssertHealthy(); err != nil { + if err := plugin.mpsDaemon.AssertHealthy(); err != nil { return fmt.Errorf("error checking MPS daemon health: %w", err) } + klog.InfoS("MPS daemon is healthy", "resource", plugin.rm.Resource()) return nil } @@ -329,7 +343,6 @@ func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*plu response := &pluginapi.ContainerAllocateResponse{ Envs: make(map[string]string), } - if plugin.deviceListStrategies.IsCDIEnabled() { responseID := uuid.New().String() if err := plugin.updateResponseForCDI(response, responseID, deviceIDs...); err != nil { @@ -361,26 +374,24 @@ func (plugin *NvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*plu // This includes per-resource pipe and log directories as well as a global daemon-specific shm // and assumes that an MPS control daemon has already been started. func (plugin NvidiaDevicePlugin) updateResponseForMPS(response *pluginapi.ContainerAllocateResponse) { - pipeDir := filepath.Join("/mps", string(plugin.rm.Resource()), "pipe") - response.Envs["CUDA_MPS_PIPE_DIRECTORY"] = pipeDir + // TODO: We should check that the deviceIDs are shared using MPS. + for k, v := range plugin.mpsDaemon.Envvars() { + response.Envs[k] = v + } + + resourceName := plugin.rm.Resource() response.Mounts = append(response.Mounts, &pluginapi.Mount{ - ContainerPath: pipeDir, - HostPath: filepath.Join("/var/lib/kubelet/device-plugins", pipeDir), + ContainerPath: plugin.mpsDaemon.PipeDir(), + HostPath: plugin.mpsHostRoot.PipeDir(resourceName), }, - ) - logDir := filepath.Join("/mps", string(plugin.rm.Resource()), "log") - response.Envs["CUDA_MPS_LOG_DIRECTORY"] = logDir - response.Mounts = append(response.Mounts, &pluginapi.Mount{ - ContainerPath: logDir, - HostPath: filepath.Join("/var/lib/kubelet/device-plugins", logDir), + ContainerPath: plugin.mpsDaemon.PipeDir(), + HostPath: plugin.mpsHostRoot.LogDir(resourceName), }, - ) - response.Mounts = append(response.Mounts, &pluginapi.Mount{ - ContainerPath: "/dev/shm", - HostPath: "/var/lib/kubelet/device-plugins/mps/shm", + ContainerPath: plugin.mpsDaemon.ShmDir(), + HostPath: plugin.mpsHostRoot.ShmDir(resourceName), }, ) }