Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use NVIDIA_CDI_HOOK_PATH instead of NVIDIA_CTK_PATH #212

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions cmd/nvidia-dra-plugin/cdi.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,16 @@ const (
)

type CDIHandler struct {
logger *logrus.Logger
nvml nvml.Interface
nvdevice nvdevice.Interface
nvcdiDevice nvcdi.Interface
nvcdiClaim nvcdi.Interface
cache *cdiapi.Cache
driverRoot string
devRoot string
targetDriverRoot string
nvidiaCTKPath string
logger *logrus.Logger
nvml nvml.Interface
nvdevice nvdevice.Interface
nvcdiDevice nvcdi.Interface
nvcdiClaim nvcdi.Interface
cache *cdiapi.Cache
driverRoot string
devRoot string
targetDriverRoot string
nvidiaCDIHookPath string

cdiRoot string
vendor string
Expand Down Expand Up @@ -103,7 +103,7 @@ func NewCDIHandler(opts ...cdiOption) (*CDIHandler, error) {
nvcdi.WithMode("nvml"),
nvcdi.WithVendor(h.vendor),
nvcdi.WithClass(h.deviceClass),
nvcdi.WithNVIDIACDIHookPath(h.nvidiaCTKPath),
nvcdi.WithNVIDIACDIHookPath(h.nvidiaCDIHookPath),
)
if err != nil {
return nil, fmt.Errorf("unable to create CDI library for devices: %w", err)
Expand All @@ -120,7 +120,7 @@ func NewCDIHandler(opts ...cdiOption) (*CDIHandler, error) {
nvcdi.WithMode("nvml"),
nvcdi.WithVendor(h.vendor),
nvcdi.WithClass(h.claimClass),
nvcdi.WithNVIDIACDIHookPath(h.nvidiaCTKPath),
nvcdi.WithNVIDIACDIHookPath(h.nvidiaCDIHookPath),
)
if err != nil {
return nil, fmt.Errorf("unable to create CDI library for claims: %w", err)
Expand Down
6 changes: 3 additions & 3 deletions cmd/nvidia-dra-plugin/cdioptions.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ func WithCDIRoot(cdiRoot string) cdiOption {
}
}

// WithNvidiaCTKPath provides an cdiOption to set the nvidia-ctk path used by the 'cdi' interface.
func WithNvidiaCTKPath(path string) cdiOption {
// WithNvidiaCDIHookPath provides an cdiOption to set the nvidia-cdi-hook path used by the 'cdi' interface.
func WithNvidiaCDIHookPath(path string) cdiOption {
return func(c *CDIHandler) {
c.nvidiaCTKPath = path
c.nvidiaCDIHookPath = path
}
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/nvidia-dra-plugin/device_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
WithDriverRoot(string(containerDriverRoot)),
WithDevRoot(devRoot),
WithTargetDriverRoot(hostDriverRoot),
WithNvidiaCTKPath(config.flags.nvidiaCTKPath),
WithNvidiaCDIHookPath(config.flags.nvidiaCDIHookPath),
WithCDIRoot(config.flags.cdiRoot),
WithVendor(cdiVendor),
)
Expand Down
12 changes: 6 additions & 6 deletions cmd/nvidia-dra-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ type Flags struct {
cdiRoot string
containerDriverRoot string
hostDriverRoot string
nvidiaCTKPath string
nvidiaCDIHookPath string
deviceClasses sets.Set[string]
}

Expand Down Expand Up @@ -108,11 +108,11 @@ func newApp() *cli.App {
EnvVars: []string{"CONTAINER_DRIVER_ROOT"},
},
&cli.StringFlag{
Name: "nvidia-ctk-path",
Value: "/usr/bin/nvidia-ctk",
Usage: "the path to use for the nvidia-ctk in the generated CDI specification. Note that this represents the path on the host.",
Destination: &flags.nvidiaCTKPath,
EnvVars: []string{"NVIDIA_CTK_PATH"},
Name: "nvidia-cdi-hook-path",
Value: "/usr/bin/nvidia-cdi-hook",
Usage: "the path to use for the nvidia-cdi-hook in the generated CDI specification. Note that this represents the path on the host.",
Destination: &flags.nvidiaCDIHookPath,
EnvVars: []string{"NVIDIA_CDI_HOOK_PATH", "NVIDIA_CTK_PATH"},
},
&cli.StringSliceFlag{
Name: "device-classes",
Expand Down
2 changes: 1 addition & 1 deletion demo/clusters/kind/install-dra-driver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ kubectl label node -l node-role.x-k8s.io/worker --overwrite nvidia.com/gpu.prese
deviceClasses=${1:-"gpu,mig,imex"}
helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-driver \
--set deviceClasses="{${deviceClasses}}" \
${NVIDIA_CTK_PATH:+--set nvidiaCtkPath=${NVIDIA_CTK_PATH}} \
${NVIDIA_CDI_HOOK_PATH:+--set nvidiaCDIHookPath=${NVIDIA_CDI_HOOK_PATH}} \
${NVIDIA_DRIVER_ROOT:+--set nvidiaDriverRoot=${NVIDIA_DRIVER_ROOT}} \
${MASK_NVIDIA_DRIVER_PARAMS:+--set maskNvidiaDriverParams=${MASK_NVIDIA_DRIVER_PARAMS}} \
--wait
Expand Down
11 changes: 5 additions & 6 deletions demo/clusters/kind/scripts/kind-cluster-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,12 @@ nodes:
# in `/etc/nvidia-container-runtime/config.toml`
- hostPath: /dev/null
containerPath: /var/run/nvidia-container-devices/cdi/runtime.nvidia.com/gpu/all
# The generated CDI specification assumes that `nvidia-ctk` is available on a
# node -- specifically for the `nvidia-ctk hook` subcommand. As a workaround,
# we mount it from the host.
# TODO: Remove this once we have a more stable solution to make `nvidia-ctk`
# The generated CDI specification assumes that `nvidia-cdi-hook` is available on a
# node
# TODO: Remove this once we have a more stable solution to make `nvidia-cdi-hook`
# on the kind nodes.
- hostPath: /usr/bin/nvidia-ctk
containerPath: /usr/bin/nvidia-ctk
- hostPath: /usr/bin/nvidia-cdi-hook
containerPath: /usr/bin/nvidia-cdi-hook
# We need to inject the fabricmanager socket to support MIG with toolkit 1.16.2
# TODO: Remove this once we have a version of the toolkit where this is not required
- hostPath: /run/nvidia-fabricmanager/socket
Expand Down
11 changes: 5 additions & 6 deletions demo/clusters/nvkind/scripts/kind-cluster-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,12 @@ nodes:
# in `/etc/nvidia-container-runtime/config.toml`
- hostPath: /dev/null
containerPath: /var/run/nvidia-container-devices/cdi/runtime.nvidia.com/gpu/{{ $gpu }}
# The generated CDI specification assumes that `nvidia-ctk` is available on a
# node -- specifically for the `nvidia-ctk hook` subcommand. As a workaround,
# we mount it from the host.
# TODO: Remove this once we have a more stable solution to make `nvidia-ctk`
# The generated CDI specification assumes that `nvidia-cdi-hook` is available on a
# node
# TODO: Remove this once we have a more stable solution to make `nvidia-cdi-hook`
# on the kind nodes.
- hostPath: /usr/bin/nvidia-ctk
containerPath: /usr/bin/nvidia-ctk
- hostPath: /usr/bin/nvidia-cdi-hook
containerPath: /usr/bin/nvidia-cdi-hook
# We need to inject the fabricmanager socket to support MIG with toolkit 1.16.2
# TODO: Remove this once we have a version of the toolkit where this is not required
- hostPath: /run/nvidia-fabricmanager/socket
Expand Down
4 changes: 2 additions & 2 deletions deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ spec:
env:
- name: MASK_NVIDIA_DRIVER_PARAMS
value: "{{ .Values.maskNvidiaDriverParams }}"
- name: NVIDIA_CTK_PATH
value: "{{ .Values.nvidiaCtkPath }}"
- name: NVIDIA_CDI_HOOK_PATH
value: "{{ .Values.nvidiaCDIHookPath }}"
- name: NVIDIA_DRIVER_ROOT
value: "{{ .Values.nvidiaDriverRoot }}"
- name: NVIDIA_VISIBLE_DEVICES
Expand Down
4 changes: 2 additions & 2 deletions deployments/helm/k8s-dra-driver/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
# For driver installed directly on a host, a value of `/` is used.
nvidiaDriverRoot: /

# Specify the path of CTK binary (nvidia-ctk) on the host,
# Specify the path of cdi hook (nvidia-cdi-hook) on the host,
# as it should appear in the the generated CDI specification.
# The path depends on the system that runs on the node.
nvidiaCtkPath: /usr/bin/nvidia-ctk
nvidiaCDIHookPath: /usr/bin/nvidia-cdi-hook

nameOverride: ""
fullnameOverride: ""
Expand Down