Merge pull request #23 from NVIDIA/dra-on-gke

Address issues with running DRA driver on GKE
NVIDIA · Nov 21, 2023 · 1c4bf38 · 1c4bf38
2 parents 1bdce27 + af5302d
commit 1c4bf38
Show file tree

Hide file tree

Showing 35 changed files with 510 additions and 88 deletions.
diff --git a/cmd/nvidia-dra-plugin/cdi.go b/cmd/nvidia-dra-plugin/cdi.go
@@ -50,6 +50,7 @@ type CDIHandler struct {
 	nvcdi            nvcdi.Interface
 	registry         cdiapi.Registry
 	driverRoot       string
+	devRoot          string
 	targetDriverRoot string
 	nvidiaCTKPath    string
 
@@ -84,6 +85,7 @@ func NewCDIHandler(opts ...cdiOption) (*CDIHandler, error) {
 		nvcdilib, err := nvcdi.New(
 			nvcdi.WithDeviceLib(h.nvdevice),
 			nvcdi.WithDriverRoot(h.driverRoot),
+			nvcdi.WithDevRoot(h.devRoot),
 			nvcdi.WithLogger(h.logger),
 			nvcdi.WithNvmlLib(h.nvml),
 			nvcdi.WithMode("nvml"),

diff --git a/cmd/nvidia-dra-plugin/device_state.go b/cmd/nvidia-dra-plugin/device_state.go
@@ -22,6 +22,7 @@ import (
 	"sync"
 
 	"github.com/NVIDIA/go-nvlib/pkg/nvml"
+	"k8s.io/klog/v2"
 
 	nascrd "github.com/NVIDIA/k8s-dra-driver/api/nvidia.com/resource/gpu/nas/v1alpha1"
 )
@@ -126,7 +127,8 @@ type DeviceState struct {
 }
 
 func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
-	nvdevlib, err := newDeviceLib(root(config.flags.containerDriverRoot))
+	containerDriverRoot := root(config.flags.containerDriverRoot)
+	nvdevlib, err := newDeviceLib(containerDriverRoot)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create device library: %w", err)
 	}
@@ -136,12 +138,15 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
 		return nil, fmt.Errorf("error enumerating all possible devices: %w", err)
 	}
 
+	devRoot := containerDriverRoot.getDevRoot()
+	klog.Infof("using devRoot=%v", devRoot)
+
 	hostDriverRoot := config.flags.hostDriverRoot
-	containerDriverRoot := config.flags.containerDriverRoot
 	cdi, err := NewCDIHandler(
 		WithNvml(nvdevlib.nvmllib),
 		WithDeviceLib(nvdevlib),
-		WithDriverRoot(containerDriverRoot),
+		WithDriverRoot(string(containerDriverRoot)),
+		WithDevRoot(devRoot),
 		WithTargetDriverRoot(hostDriverRoot),
 		WithNvidiaCTKPath(config.flags.nvidiaCTKPath),
 		WithCDIRoot(config.flags.cdiRoot),

diff --git a/cmd/nvidia-dra-plugin/find.go b/cmd/nvidia-dra-plugin/find.go
@@ -18,6 +18,7 @@ package main
 
 import (
 	"fmt"
+	"os"
 	"path/filepath"
 )
 
@@ -60,6 +61,25 @@ func (r root) getNvidiaSMIPath() (string, error) {
 	return binaryPath, nil
 }
 
+// isDevRoot checks whether the specified root is a dev root.
+// A dev root is defined as a root containing a /dev folder.
+func (r root) isDevRoot() bool {
+	stat, err := os.Stat(filepath.Join(string(r), "dev"))
+	if err != nil {
+		return false
+	}
+	return stat.IsDir()
+}
+
+// getDevRoot returns the dev root associated with the root.
+// If the root is not a dev root, this defaults to "/".
+func (r root) getDevRoot() string {
+	if r.isDevRoot() {
+		return string(r)
+	}
+	return "/"
+}
+
 // findFile searches the root for a specified file.
 // A number of folders can be specified to search in addition to the root itself.
 // If the file represents a symlink, this is resolved and the final path is returned.

diff --git a/cmd/nvidia-dra-plugin/options.go b/cmd/nvidia-dra-plugin/options.go
@@ -31,6 +31,13 @@ func WithDriverRoot(root string) cdiOption {
 	}
 }
 
+// WithDevRoot provides a cdiOption to set the device root used by the 'cdi' interface.
+func WithDevRoot(root string) cdiOption {
+	return func(c *CDIHandler) {
+		c.devRoot = root
+	}
+}
+
 // WithTargetDriverRoot provides an cdiOption to set the target driver root used by the 'cdi' interface.
 func WithTargetDriverRoot(root string) cdiOption {
 	return func(c *CDIHandler) {

diff --git a/demo/clusters/gke/create-cluster.sh b/demo/clusters/gke/create-cluster.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+# Copyright 2023 NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+: ${PROJECT_NAME:=$(gcloud config list --format 'value(core.project)' 2>/dev/null)}
+
+if [[ -z ${PROJECT_NAME} ]]; then
+	echo "Project name could not be determined"
+	echo "Please run 'gcloud config set project'"
+	exit 1
+fi
+
+CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
+PROJECT_DIR="$(cd -- "$( dirname -- "${CURRENT_DIR}/../../../.." )" &> /dev/null && pwd)"
+
+# We extract information from versions.mk
+function from_versions_mk() {
+    local makevar=$1
+    local value=$(grep -E "^\s*${makevar}\s+[\?:]= " ${PROJECT_DIR}/versions.mk)
+    echo ${value##*= }
+}
+DRIVER_NAME=$(from_versions_mk "DRIVER_NAME")
+
+NETWORK_NAME="${DRIVER_NAME}-net"
+CLUSTER_NAME="${DRIVER_NAME}-cluster"
+
+## Create the Network for the cluster
+gcloud compute networks create "${NETWORK_NAME}" \
+	--quiet \
+	--project="${PROJECT_NAME}" \
+	--description=Manually\ created\ network\ for\ TMS\ DRA\ Alpha\ cluster \
+	--subnet-mode=auto \
+	--mtu=1460 \
+	--bgp-routing-mode=regional
+
+## Create the cluster
+gcloud container clusters create "${CLUSTER_NAME}" \
+	--quiet \
+	--enable-kubernetes-alpha \
+	--no-enable-autorepair \
+	--no-enable-autoupgrade \
+	--region us-west1 \
+	--network "${NETWORK_NAME}" \
+	--node-labels=nvidia.com/dra.controller=true
+
+# Create t4 node pool
+gcloud beta container node-pools create "pool-1" \
+	--quiet \
+	--project "${PROJECT_NAME}" \
+	--cluster "${CLUSTER_NAME}" \
+	--region "us-west1" \
+	--node-version "1.27.3-gke.100" \
+	--machine-type "n1-standard-8" \
+	--accelerator "type=nvidia-tesla-t4,count=1" \
+	--image-type "UBUNTU_CONTAINERD" \
+	--disk-type "pd-standard" \
+	--disk-size "100" \
+	--metadata disable-legacy-endpoints=true \
+	--scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" \
+	--num-nodes "2" \
+	--enable-autoscaling \
+	--min-nodes "2" \
+	--max-nodes "6" \
+	--location-policy "ANY" \
+	--no-enable-autoupgrade \
+	--no-enable-autorepair \
+	--max-surge-upgrade 1 \
+	--max-unavailable-upgrade 0 \
+	--node-locations "us-west1-a" \
+	--node-labels=gke-no-default-nvidia-gpu-device-plugin=true,nvidia.com/gpu=present,nvidia.com/dra.kubelet-plugin=true
+
+# Create v100 node pool
+gcloud beta container node-pools create "pool-2" \
+	--quiet \
+    --project "${PROJECT_NAME}" \
+	--cluster "${CLUSTER_NAME}" \
+	--region "us-west1" \
+	--node-version "1.27.3-gke.100" \
+	--machine-type "n1-standard-8" \
+	--accelerator "type=nvidia-tesla-v100,count=1" \
+	--image-type "UBUNTU_CONTAINERD" \
+	--disk-type "pd-standard" \
+	--disk-size "100" \
+	--metadata disable-legacy-endpoints=true \
+	--scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" \
+	--num-nodes "1" \
+	--enable-autoscaling \
+	--min-nodes "1" \
+	--max-nodes "6" \
+	--location-policy "ANY" \
+	--no-enable-autoupgrade \
+	--no-enable-autorepair \
+	--max-surge-upgrade 1 \
+	--max-unavailable-upgrade 0 \
+	--node-locations "us-west1-a" \
+	--node-labels=gke-no-default-nvidia-gpu-device-plugin=true,nvidia.com/gpu=present,nvidia.com/dra.kubelet-plugin=true
+
+## Allow the GPU nodes access to the internet
+gcloud compute routers create ${NETWORK_NAME}-nat-router \
+	--quiet \
+	--project "${PROJECT_NAME}" \
+	--network "${NETWORK_NAME}" \
+	--region "us-west1"
+
+gcloud compute routers nats create "${NETWORK_NAME}-nat-config" \
+	--quiet \
+	--project "${PROJECT_NAME}" \
+    --router "${NETWORK_NAME}-nat-router" \
+    --nat-all-subnet-ip-ranges \
+    --auto-allocate-nat-external-ips \
+    --router-region "us-west1"
+
+## Start using this cluster for kubectl
+gcloud container clusters get-credentials "${CLUSTER_NAME}" --location="us-west1"
+
+## Launch the nvidia-driver-installer daemonset to install the GPU drivers on any GPU nodes that come online:
+kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/ubuntu/daemonset-preloaded.yaml
+
+## Create the nvidia namespace
+kubectl create namespace nvidia
+
+## Deploy a custom daemonset that prepares a node for use with DRA
+kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-dra-driver/456d097feb452cca1351817bab2ccd0782e96c9f/demo/prepare-gke-nodes-for-dra.yaml
diff --git a/demo/clusters/gke/delete-cluster.sh b/demo/clusters/gke/delete-cluster.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright 2023 NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+: ${PROJECT_NAME:=$(gcloud config list --format 'value(core.project)' 2>/dev/null)}
+
+CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
+PROJECT_DIR="$(cd -- "$( dirname -- "${CURRENT_DIR}/../../../.." )" &> /dev/null && pwd)"
+
+# We extract information from versions.mk
+function from_versions_mk() {
+    local makevar=$1
+    local value=$(grep -E "^\s*${makevar}\s+[\?:]= " ${PROJECT_DIR}/versions.mk)
+    echo ${value##*= }
+}
+DRIVER_NAME=$(from_versions_mk "DRIVER_NAME")
+
+NETWORK_NAME="${DRIVER_NAME}-net"
+CLUSTER_NAME="${DRIVER_NAME}-cluster"
+
+## Delete the cluster
+gcloud container clusters delete "${CLUSTER_NAME}" \
+	--quiet \
+	--project "${PROJECT_NAME}" \
+	--region "us-west1"
+
+## Delete the nat config
+gcloud compute routers nats delete "${NETWORK_NAME}-nat-config" \
+	--quiet \
+	--project "${PROJECT_NAME}" \
+    --router "${NETWORK_NAME}-nat-router" \
+    --router-region "us-west1"
+
+## Delete the nat router
+gcloud compute routers delete ${NETWORK_NAME}-nat-router \
+	--quiet \
+	--project "${PROJECT_NAME}" \
+	--region "us-west1"
+
+## Delete the network
+gcloud compute networks delete "${NETWORK_NAME}" \
+	--quiet \
+	--project "${PROJECT_NAME}"
diff --git a/demo/clusters/gke/install-dra-driver.sh b/demo/clusters/gke/install-dra-driver.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright 2023 NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
+PROJECT_DIR="$(cd -- "$( dirname -- "${CURRENT_DIR}/../../../.." )" &> /dev/null && pwd)"
+
+# We extract information from versions.mk
+function from_versions_mk() {
+    local makevar=$1
+    local value=$(grep -E "^\s*${makevar}\s+[\?:]= " ${PROJECT_DIR}/versions.mk)
+    echo ${value##*= }
+}
+DRIVER_NAME=$(from_versions_mk "DRIVER_NAME")
+
+: ${IMAGE_REGISTRY:=registry.gitlab.com/nvidia/cloud-native/k8s-dra-driver/staging}
+: ${IMAGE_NAME:=${DRIVER_NAME}}
+: ${IMAGE_TAG:=530b16c-ubuntu20.04}
+
+helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-driver \
+  --set image.repository=${IMAGE_REGISTRY}/${IMAGE_NAME} \
+  --set image.tag=${IMAGE_TAG} \
+  --set image.pullPolicy=Always \
+  --set controller.priorityClassName="" \
+  --set kubeletPlugin.priorityClassName="" \
+  --set nvidiaDriverRoot="/opt/nvidia" \
+  --set kubeletPlugin.tolerations[0].key=nvidia.com/gpu \
+  --set kubeletPlugin.tolerations[0].operator=Exists \
+  --set kubeletPlugin.tolerations[0].effect=NoSchedule
diff --git a/demo/specs/selectors/README.md b/demo/specs/selectors/README.md
@@ -0,0 +1,48 @@
+#### List the set of nodes in the cluster
+```console
+kubectl get nodes -A
+```
+
+#### Show the set of nodes which have GPUs available
+```console
+kubectl get nodeallocationstates.nas.gpu.resource.nvidia.com -A
+```
+
+#### Show the set of allocatable GPUs from each node
+```console
+kubectl get nodeallocationstates.nas.gpu.resource.nvidia.com -A -o=json \
+	| jq -r '.items[] 
+             | "\(.metadata.name):",
+             (.spec.allocatableDevices[])'
+```
+
+### Open the yaml files with the specs for the demo
+```console
+vi -O parameters.yaml claims.yaml pods.yaml
+```
+
+#### Create a namespace for the demo and deploy the demo pods
+```console
+kubectl create namespace kubecon-demo
+kubectl apply -f parameters.yaml -f claims.yaml -f pods.yaml
+```
+
+#### Show the pods running
+```console
+kubectl get pod -n kubecon-demo
+```
+
+#### Show the set of GPUs allocated to some claim
+```console
+kubectl get nodeallocationstates.nas.gpu.resource.nvidia.com -A -o=json \
+	| jq -r '.items[]
+             | select(.spec.allocatedClaims)
+             | "\(.metadata.name):",
+             (.spec.allocatedClaims[])'
+```
+
+#### Show the logs of the inference and training pods
+```console
+kubectl logs -n kubecon-demo inference-pod
+kubectl logs -n kubecon-demo training-pod
+```