From 5a9fee76773c94e57fdb0267f55eaacaf4a59ab1 Mon Sep 17 00:00:00 2001 From: Guy Shaibi Date: Wed, 20 Mar 2024 17:29:29 +0200 Subject: [PATCH 1/7] Annotate GPU Reservation Pods with GpuIdx --- .../handlers/pod/dedicated_gpu_pod_handler.go | 4 +- .../pod/gpu_reservation_pod_handler.go | 40 +++++++++++++++++++ .../status-updater/handlers/pod/handler.go | 5 +++ .../handlers/pod/shared_gpu_pod_handler.go | 6 +-- internal/status-updater/util/util.go | 14 +++---- 5 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go diff --git a/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go b/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go index 68de457..de24474 100644 --- a/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go +++ b/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go @@ -42,7 +42,7 @@ func (p *PodHandler) handleDedicatedGpuPodAddition(pod *v1.Pod, nodeTopology *to gpu.Status.AllocatedBy.Pod = pod.Name gpu.Status.AllocatedBy.Container = pod.Spec.Containers[0].Name - if pod.Namespace != constants.ReservationNs { + if !util.IsGpuReservationPod(pod) { gpu.Status.PodGpuUsageStatus[pod.UID] = calculateUsage(p.dynamicClient, pod, nodeTopology.GpuMemory) } @@ -65,7 +65,7 @@ func (p *PodHandler) handleDedicatedGpuPodUpdate(pod *v1.Pod, nodeTopology *topo gpu.Status.AllocatedBy.Pod == pod.Name && gpu.Status.AllocatedBy.Container == pod.Spec.Containers[0].Name if isGpuOccupiedByPod { - if pod.Namespace != constants.ReservationNs { + if !util.IsGpuReservationPod(pod) { gpu.Status.PodGpuUsageStatus[pod.UID] = calculateUsage(p.dynamicClient, pod, nodeTopology.GpuMemory) } diff --git a/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go new file mode 100644 index 0000000..684c225 --- /dev/null +++ b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go @@ -0,0 +1,40 @@ +package pod + +import ( + "context" + "fmt" + + "github.com/google/uuid" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" + "github.com/run-ai/fake-gpu-operator/internal/status-updater/util" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +func (p *PodHandler) handleGpuReservationPodAddition(pod *v1.Pod) error { + if !util.IsGpuReservationPod(pod) { + return nil + } + + err := p.setReservationPodGpuIdxAnnotation(pod) + if err != nil { + return fmt.Errorf("failed to set GPU index annotation for reservation pod %s: %w", pod.Name, err) + } + + return nil +} + +func (p *PodHandler) setReservationPodGpuIdxAnnotation(pod *v1.Pod) error { + annotationKey := constants.ReservationPodGpuIdxAnnotation + annotationVal := fmt.Sprintf("GPU-%s", uuid.NewString()) + patch := []byte(fmt.Sprintf(`{"metadata": {"annotations": {"%s": "%s"}}}`, annotationKey, annotationVal)) + + _, err := p.kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.MergePatchType, patch, metav1.PatchOptions{}) + if err != nil { + return fmt.Errorf("failed to update pod %s: %w", pod.Name, err) + } + + return nil +} diff --git a/internal/status-updater/handlers/pod/handler.go b/internal/status-updater/handlers/pod/handler.go index 609e441..7a4e5ef 100644 --- a/internal/status-updater/handlers/pod/handler.go +++ b/internal/status-updater/handlers/pod/handler.go @@ -38,6 +38,11 @@ func (p *PodHandler) HandleAdd(pod *v1.Pod) error { return fmt.Errorf("could not get node %s topology: %w", pod.Spec.NodeName, err) } + err = p.handleGpuReservationPodAddition(pod) + if err != nil { + return err + } + err = p.handleDedicatedGpuPodAddition(pod, nodeTopology) if err != nil { return err diff --git a/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go b/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go index d9262fb..cd85b50 100644 --- a/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go +++ b/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go @@ -15,10 +15,6 @@ import ( "k8s.io/client-go/kubernetes" ) -const ( - runaiReservationNs = constants.ReservationNs -) - func (p *PodHandler) handleSharedGpuPodAddition(pod *v1.Pod, nodeTopology *topology.NodeTopology) error { if !util.IsSharedGpuPod(pod) { return nil @@ -155,5 +151,5 @@ func getMatchingReservationPodNameByRunaiGpuGroupLabel(kubeclient kubernetes.Int } func getNodeReservationPods(kubeclient kubernetes.Interface, nodeName string) (*v1.PodList, error) { - return kubeclient.CoreV1().Pods(runaiReservationNs).List(context.TODO(), metav1.ListOptions{FieldSelector: "spec.nodeName=" + nodeName}) + return kubeclient.CoreV1().Pods(constants.ReservationNs).List(context.TODO(), metav1.ListOptions{FieldSelector: "spec.nodeName=" + nodeName}) } diff --git a/internal/status-updater/util/util.go b/internal/status-updater/util/util.go index 2b8c99e..1f0c618 100644 --- a/internal/status-updater/util/util.go +++ b/internal/status-updater/util/util.go @@ -9,9 +9,8 @@ import ( func IsSharedGpuPod(pod *v1.Pod) bool { _, runaiGpuExists := pod.Annotations[constants.GpuIdxAnnotation] _, runaiGpuGroupExists := pod.Labels[constants.GpuGroupLabel] - isReservationPod := pod.Namespace == constants.ReservationNs - return !isReservationPod && (runaiGpuExists || runaiGpuGroupExists) + return !IsGpuReservationPod(pod) && (runaiGpuExists || runaiGpuGroupExists) } func IsDedicatedGpuPod(pod *v1.Pod) bool { @@ -27,11 +26,10 @@ func IsPodTerminated(pod *v1.Pod) bool { } func IsPodScheduled(pod *v1.Pod) bool { - for _, condition := range pod.Status.Conditions { - if condition.Type == v1.PodScheduled && condition.Status == v1.ConditionTrue { - return true - } - } + // This should be checked using the pod's PodScheduled condition once https://github.com/run-ai/runai-engine/pull/174 is merged and available. + return pod.Spec.NodeName != "" +} - return false +func IsGpuReservationPod(pod *v1.Pod) bool { + return pod.Namespace == constants.ReservationNs } From bc6b882112b77c13247091834a6ba20caebc97c9 Mon Sep 17 00:00:00 2001 From: Guy Shaibi Date: Sun, 31 Mar 2024 10:07:37 +0300 Subject: [PATCH 2/7] . --- go.mod | 19 ++++++---- go.sum | 38 +++++++++++-------- .../handlers/pod/dedicated_gpu_pod_handler.go | 5 +++ .../pod/gpu_reservation_pod_handler.go | 28 +++++++++++--- .../status-updater/handlers/pod/handler.go | 5 --- 5 files changed, 60 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index 3c09a9e..8efc487 100644 --- a/go.mod +++ b/go.mod @@ -7,14 +7,14 @@ require ( github.com/hashicorp/go-multierror v1.1.1 github.com/jedib0t/go-pretty/v6 v6.3.3 github.com/mitchellh/mapstructure v1.5.0 - github.com/onsi/ginkgo/v2 v2.6.0 - github.com/onsi/gomega v1.24.1 + github.com/onsi/ginkgo/v2 v2.17.1 + github.com/onsi/gomega v1.30.0 github.com/otiai10/copy v1.7.0 github.com/prometheus/client_golang v1.14.0 github.com/prometheus/client_model v0.3.0 github.com/spf13/viper v1.14.0 github.com/tidwall/gjson v1.14.1 - golang.org/x/net v0.9.0 + golang.org/x/net v0.20.0 google.golang.org/grpc v1.56.3 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.26.0 @@ -29,7 +29,9 @@ require ( github.com/evanphx/json-patch/v5 v5.6.0 // indirect github.com/go-playground/locales v0.14.0 // indirect github.com/go-playground/universal-translator v0.18.0 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/imdario/mergo v0.3.6 // indirect @@ -44,6 +46,7 @@ require ( github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.4.1 // indirect + golang.org/x/tools v0.17.0 // indirect gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect gopkg.in/go-playground/assert.v1 v1.2.1 // indirect @@ -59,7 +62,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect - github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/logr v1.4.1 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.20.0 // indirect github.com/go-openapi/swag v0.19.14 // indirect @@ -67,7 +70,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/gnostic v0.5.7-v3refs // indirect - github.com/google/go-cmp v0.5.9 // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -85,9 +88,9 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect golang.org/x/oauth2 v0.7.0 // indirect - golang.org/x/sys v0.7.0 // indirect - golang.org/x/term v0.7.0 // indirect - golang.org/x/text v0.9.0 // indirect + golang.org/x/sys v0.16.0 // indirect + golang.org/x/term v0.16.0 // indirect + golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.3.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect diff --git a/go.sum b/go.sum index bf5f40a..9d9b7c5 100644 --- a/go.sum +++ b/go.sum @@ -131,8 +131,8 @@ github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KE github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= -github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.2.0/go.mod h1:Qa4Bsj2Vb+FAVeAKsLD8RLQ+YRJB8YDmOAKxaBQf7Ro= github.com/go-logr/zapr v1.2.3 h1:a9vnzlIBPQBBkeaR9IuMUfmVOrQlkoC4YfPoFkX3T7A= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= @@ -152,6 +152,8 @@ github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl github.com/go-playground/validator v9.31.0+incompatible h1:UA72EPEogEnq76ehGdEDp4Mit+3FDh548oRqwVgNsHA= github.com/go-playground/validator v9.31.0+incompatible/go.mod h1:yrEkQXlcI+PugkyDjY2bRrL/UBU4f3rvrgkN3V8JEig= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= @@ -204,8 +206,8 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -224,6 +226,8 @@ github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= @@ -317,13 +321,13 @@ github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo/v2 v2.6.0 h1:9t9b9vRUbFq3C4qKFCGkVuq/fIHji802N1nrtkh1mNc= -github.com/onsi/ginkgo/v2 v2.6.0/go.mod h1:63DOGlLAH8+REH8jUGdL3YpCpu7JODesutUjdENfUAc= +github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= +github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.24.1 h1:KORJXNNTzJXzu4ScJWssJfJMnJ+2QJqhoQSRwNlze9E= -github.com/onsi/gomega v1.24.1/go.mod h1:3AOiACssS3/MajrniINInwbfOOtfZvplPzuRSmvt1jM= +github.com/onsi/gomega v1.30.0 h1:hvMK7xYz4D3HapigLTeGdId/NcfQx1VHMJc60ew99+8= +github.com/onsi/gomega v1.30.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= github.com/otiai10/copy v1.7.0 h1:hVoPiN+t+7d2nzzwMiDHPSOogsWAStewq3TwU05+clE= github.com/otiai10/copy v1.7.0/go.mod h1:rmRl6QPdJj6EiUqXQ/4Nn2lLXoNQjFCQbbNrxgc/t3U= github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE= @@ -543,8 +547,8 @@ golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= -golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -632,12 +636,12 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= -golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= +golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -647,8 +651,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -709,6 +713,8 @@ golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.10-0.20220218145154-897bd77cd717/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go b/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go index de24474..8eb6ac6 100644 --- a/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go +++ b/internal/status-updater/handlers/pod/dedicated_gpu_pod_handler.go @@ -50,6 +50,11 @@ func (p *PodHandler) handleDedicatedGpuPodAddition(pod *v1.Pod, nodeTopology *to } } + err := p.handleGpuReservationPodAddition(pod, nodeTopology) + if err != nil { + return fmt.Errorf("failed to handle GPU reservation pod addition: %w", err) + } + return nil } diff --git a/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go index 684c225..8732325 100644 --- a/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go +++ b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go @@ -4,8 +4,8 @@ import ( "context" "fmt" - "github.com/google/uuid" "github.com/run-ai/fake-gpu-operator/internal/common/constants" + "github.com/run-ai/fake-gpu-operator/internal/common/topology" "github.com/run-ai/fake-gpu-operator/internal/status-updater/util" v1 "k8s.io/api/core/v1" @@ -13,12 +13,12 @@ import ( "k8s.io/apimachinery/pkg/types" ) -func (p *PodHandler) handleGpuReservationPodAddition(pod *v1.Pod) error { +func (p *PodHandler) handleGpuReservationPodAddition(pod *v1.Pod, nodeTopology *topology.NodeTopology) error { if !util.IsGpuReservationPod(pod) { return nil } - err := p.setReservationPodGpuIdxAnnotation(pod) + err := p.setReservationPodGpuIdxAnnotation(pod, nodeTopology) if err != nil { return fmt.Errorf("failed to set GPU index annotation for reservation pod %s: %w", pod.Name, err) } @@ -26,15 +26,31 @@ func (p *PodHandler) handleGpuReservationPodAddition(pod *v1.Pod) error { return nil } -func (p *PodHandler) setReservationPodGpuIdxAnnotation(pod *v1.Pod) error { +func (p *PodHandler) setReservationPodGpuIdxAnnotation(pod *v1.Pod, nodeTopology *topology.NodeTopology) error { + // Find the GPU allocated by the pod + allocatedGpuID, err := findPodGpuID(pod, nodeTopology) + if err != nil { + return fmt.Errorf("failed to find GPU allocated by pod %s: %w", pod.Name, err) + } + annotationKey := constants.ReservationPodGpuIdxAnnotation - annotationVal := fmt.Sprintf("GPU-%s", uuid.NewString()) + annotationVal := allocatedGpuID patch := []byte(fmt.Sprintf(`{"metadata": {"annotations": {"%s": "%s"}}}`, annotationKey, annotationVal)) - _, err := p.kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.MergePatchType, patch, metav1.PatchOptions{}) + _, err = p.kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.MergePatchType, patch, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("failed to update pod %s: %w", pod.Name, err) } return nil } + +func findPodGpuID(pod *v1.Pod, nodeTopology *topology.NodeTopology) (string, error) { + for idx := range nodeTopology.Gpus { + gpu := &nodeTopology.Gpus[idx] + if gpu.Status.AllocatedBy.Pod == pod.Name { + return gpu.ID, nil + } + } + return "", fmt.Errorf("pod %s does not have a GPU allocated", pod.Name) +} diff --git a/internal/status-updater/handlers/pod/handler.go b/internal/status-updater/handlers/pod/handler.go index 7a4e5ef..609e441 100644 --- a/internal/status-updater/handlers/pod/handler.go +++ b/internal/status-updater/handlers/pod/handler.go @@ -38,11 +38,6 @@ func (p *PodHandler) HandleAdd(pod *v1.Pod) error { return fmt.Errorf("could not get node %s topology: %w", pod.Spec.NodeName, err) } - err = p.handleGpuReservationPodAddition(pod) - if err != nil { - return err - } - err = p.handleDedicatedGpuPodAddition(pod, nodeTopology) if err != nil { return err From 0112865a322f6e40a7c7cf38d57feed1de23caaa Mon Sep 17 00:00:00 2001 From: Guy Shaibi Date: Sun, 31 Mar 2024 12:30:17 +0300 Subject: [PATCH 3/7] . --- design/samples/2.17/fractional_pod.yaml | 228 ++++++++++++++++++ .../2.17/fractional_pod_reservation.yaml | 124 ++++++++++ internal/status-updater/app_test.go | 224 ++++++++--------- .../pod/gpu_reservation_pod_handler.go | 6 + .../handlers/pod/shared_gpu_pod_handler.go | 1 - 5 files changed, 470 insertions(+), 113 deletions(-) create mode 100644 design/samples/2.17/fractional_pod.yaml create mode 100644 design/samples/2.17/fractional_pod_reservation.yaml diff --git a/design/samples/2.17/fractional_pod.yaml b/design/samples/2.17/fractional_pod.yaml new file mode 100644 index 0000000..b2481bd --- /dev/null +++ b/design/samples/2.17/fractional_pod.yaml @@ -0,0 +1,228 @@ +apiVersion: v1 +kind: Pod +metadata: + annotations: + clusterId: d69dff42-4134-41d9-90fc-1c39505cb774 + cni.projectcalico.org/containerID: 4357ed00f685ddcfafb9b551fbad81f6ad9a39d74f0158fe22b6e937dad415df + cni.projectcalico.org/podIP: 100.122.249.152/32 + cni.projectcalico.org/podIPs: 100.122.249.152/32 + gpu-fraction: "0.5" + pod-group-name: pg-frac-1-0-2237ca39-cac0-4601-b658-8a3c5f406a4f + received-resource-type: Fraction + runai-allocated-gpu-memory: "7680" + runai-allocated-gpus: "0.5" + runai-allocated-mig-gpus: "0" + runai-calculated-status: Running + runai-job-id: 2237ca39-cac0-4601-b658-8a3c5f406a4f + runai-node: i-0b498db53280b86a6 + runai/shared-gpu-configmap: frac-1-ns26p7c-runai-sh-gpu + user: test@run.ai + workloadId: 027397ab-4c3c-45f7-87d0-8b3bae4ded65 + creationTimestamp: "2024-03-31T09:03:22Z" + generateName: frac-1- + labels: + app: runaijob + controller-uid: 2237ca39-cac0-4601-b658-8a3c5f406a4f + createdBy: RunaiJob + project: pa + release: frac-1 + run.ai/top-owner-uid: 027397ab-4c3c-45f7-87d0-8b3bae4ded65 + runai-gpu-group: df7c0dd3-9795-443c-85b9-acbf49c8fb6b + runai/pod-index: 0-0 + workloadKind: TrainingWorkload + workloadName: frac-1 + name: frac-1-0-0 + namespace: runai-pa + ownerReferences: + - apiVersion: run.ai/v1 + blockOwnerDeletion: true + controller: true + kind: RunaiJob + name: frac-1 + uid: 2237ca39-cac0-4601-b658-8a3c5f406a4f + resourceVersion: "10748" + uid: a801b3c7-b9be-4830-821c-2456cad2234f +spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: runai/node-pool + operator: DoesNotExist + containers: + - env: + - name: RUNAI_JOB_NAME + value: frac-1 + - name: RUNAI_PROJECT + value: pa + - name: WANDB_NOTES + value: https://shaibi-real.runailabs.com/trainings?columnFilter=[{"term":"frac-1","name":"name"}]&clusterId=d69dff42-4134-41d9-90fc-1c39505cb774 + - name: POD_INDEX + value: "0" + - name: RUNAI_GPU_MEMORY_REQUEST + value: "0.50" + - name: RUNAI_GPU_MEMORY_LIMIT + value: "0.50" + - name: NVIDIA_VISIBLE_DEVICES + valueFrom: + configMapKeyRef: + key: RUNAI-VISIBLE-DEVICES + name: frac-1-ns26p7c-runai-sh-gpu-0 + - name: RUNAI_NUM_OF_GPUS + valueFrom: + configMapKeyRef: + key: RUNAI_NUM_OF_GPUS + name: frac-1-ns26p7c-runai-sh-gpu-0 + - name: jobUUID + value: 2237ca39-cac0-4601-b658-8a3c5f406a4f + - name: JOB_UUID + value: 2237ca39-cac0-4601-b658-8a3c5f406a4f + - name: jobName + value: frac-1 + - name: JOB_NAME + value: frac-1 + - name: reporterGatewayURL + value: runai-prometheus-pushgateway.runai.svc.cluster.local:9091 + - name: REPORTER_GATEWAY_URL + value: runai-prometheus-pushgateway.runai.svc.cluster.local:9091 + - name: podUUID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: POD_UUID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + envFrom: + - configMapRef: + name: frac-1-ns26p7c-runai-sh-gpu-0-evar + optional: false + image: gshaibi/gpu-burn + imagePullPolicy: IfNotPresent + name: frac-1 + resources: + requests: + cpu: 100m + memory: 100M + securityContext: + allowPrivilegeEscalation: false + capabilities: {} + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-dfphn + readOnly: true + - mountPath: /etc/ld.so.preload + name: frac-1-ns26p7c-runai-sh-gpu-0-vol + readOnly: true + subPath: ld.so.preload-key + - mountPath: /etc/runai.d/memory + name: frac-1-ns26p7c-runai-sh-gpu-0-vol + readOnly: true + subPath: config + - mountPath: /etc/runai.d/pod_uuid + name: frac-1-ns26p7c-runai-sh-gpu-0-vol + readOnly: true + subPath: pod-uuid + - mountPath: /runai/shared + name: runai-shared-directory + readOnly: true + - mountPath: /etc/runai.d/route + name: frac-1-ns26p7c-runai-sh-gpu-0-vol + readOnly: true + subPath: route + dnsPolicy: ClusterFirst + enableServiceLinks: true + nodeName: i-0b498db53280b86a6 + preemptionPolicy: PreemptLowerPriority + priority: 0 + restartPolicy: Never + schedulerName: runai-scheduler + securityContext: {} + serviceAccount: default + serviceAccountName: default + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + volumes: + - name: kube-api-access-dfphn + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + expirationSeconds: 3607 + path: token + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace + - configMap: + defaultMode: 420 + name: frac-1-ns26p7c-runai-sh-gpu-0 + name: frac-1-ns26p7c-runai-sh-gpu-0-vol + - hostPath: + path: /var/lib/runai/shared + type: DirectoryOrCreate + name: runai-shared-directory +status: + conditions: + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:27Z" + status: "True" + type: Initialized + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:51Z" + status: "True" + type: Ready + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:51Z" + status: "True" + type: ContainersReady + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:27Z" + status: "True" + type: PodScheduled + containerStatuses: + - containerID: containerd://4205608c75216bfe3d3a71ea7301f8bc041acba92673e033fc87be6d91867dc6 + image: docker.io/gshaibi/gpu-burn:latest + imageID: docker.io/gshaibi/gpu-burn@sha256:ed07993b0581228c2bd7113fae0ed214549547f0fa91ba50165bc2473cfaf979 + lastState: {} + name: frac-1 + ready: true + restartCount: 0 + started: true + state: + running: + startedAt: "2024-03-31T09:03:51Z" + hostIP: 172.20.62.77 + phase: Running + podIP: 100.122.249.152 + podIPs: + - ip: 100.122.249.152 + qosClass: Burstable + startTime: "2024-03-31T09:03:27Z" diff --git a/design/samples/2.17/fractional_pod_reservation.yaml b/design/samples/2.17/fractional_pod_reservation.yaml new file mode 100644 index 0000000..708a102 --- /dev/null +++ b/design/samples/2.17/fractional_pod_reservation.yaml @@ -0,0 +1,124 @@ +apiVersion: v1 +kind: Pod +metadata: + annotations: + cni.projectcalico.org/containerID: 75affaf027829643896b3de5699d15fedb291f4f7efac6f00b0d0bbe9a2dd65a + cni.projectcalico.org/podIP: 100.122.249.151/32 + cni.projectcalico.org/podIPs: 100.122.249.151/32 + pod-group-name: pg-runai-reservation-gpu-i-0b498db53280b86a6-fzdhl-3b47e794-97f0-4824-b7d5-bb44c122039e + run.ai/reserve_for_gpu_index: GPU-8983c66a-23df-e63b-4c2f-afcae9ec79b3 + runai-job-id: 3b47e794-97f0-4824-b7d5-bb44c122039e + creationTimestamp: "2024-03-31T09:03:25Z" + labels: + app: runai-reservation + app.runai.resource.reservation: runai-reservation-gpu + runai-gpu-group: df7c0dd3-9795-443c-85b9-acbf49c8fb6b + name: runai-reservation-gpu-i-0b498db53280b86a6-fzdhl + namespace: runai-reservation + resourceVersion: "10625" + uid: 3b47e794-97f0-4824-b7d5-bb44c122039e +spec: + containers: + - env: + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + image: gcr.io/run-ai-prod/resource-reservation:v3.5.0 + imagePullPolicy: IfNotPresent + name: runai-reservation + resources: + limits: + nvidia.com/gpu: "1" + requests: + nvidia.com/gpu: "1" + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-fnjgk + readOnly: true + dnsPolicy: ClusterFirst + enableServiceLinks: true + imagePullSecrets: + - name: runai-reg-creds + nodeName: i-0b498db53280b86a6 + preemptionPolicy: PreemptLowerPriority + priority: 0 + restartPolicy: Always + schedulerName: runai-scheduler + securityContext: {} + serviceAccount: runai-reservation-engine + serviceAccountName: runai-reservation-engine + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + volumes: + - name: kube-api-access-fnjgk + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + expirationSeconds: 3607 + path: token + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace +status: + conditions: + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:25Z" + status: "True" + type: Initialized + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:27Z" + status: "True" + type: Ready + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:27Z" + status: "True" + type: ContainersReady + - lastProbeTime: null + lastTransitionTime: "2024-03-31T09:03:25Z" + status: "True" + type: PodScheduled + containerStatuses: + - containerID: containerd://1063439dc8e82d20ef89a97ad9567d40d59d0d270ac5b8d4cab7f49a474e4398 + image: gcr.io/run-ai-prod/resource-reservation:v3.5.0 + imageID: gcr.io/run-ai-prod/resource-reservation@sha256:add1db641829508bbd1e74a7e757348159bc99b67844fc656acc1e795872d0a6 + lastState: {} + name: runai-reservation + ready: true + restartCount: 0 + started: true + state: + running: + startedAt: "2024-03-31T09:03:27Z" + hostIP: 172.20.62.77 + phase: Running + podIP: 100.122.249.151 + podIPs: + - ip: 100.122.249.151 + qosClass: BestEffort + startTime: "2024-03-31T09:03:25Z" diff --git a/internal/status-updater/app_test.go b/internal/status-updater/app_test.go index d744f54..63db9ac 100644 --- a/internal/status-updater/app_test.go +++ b/internal/status-updater/app_test.go @@ -252,153 +252,153 @@ var _ = Describe("StatusUpdater", func() { }) }) }) + }) - When("informed of a shared GPU pod", func() { - var ( - expectedTopology *topology.NodeTopology - ) - - var ( - expectTopologyToBeUpdatedWithReservationPod = func() { - expectedTopology = createTopology(nodeGpuCount, node) - expectedTopology.Gpus[0].Status.AllocatedBy.Pod = reservationPodName - expectedTopology.Gpus[0].Status.AllocatedBy.Container = reservationPodContainerName - expectedTopology.Gpus[0].Status.AllocatedBy.Namespace = constants.ReservationNs - Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) - } - expectTopologyToBeUpdatedWithSharedGpuPod = func() { - expectedTopology.Gpus[0].Status.PodGpuUsageStatus = topology.PodGpuUsageStatusMap{ - podUID: topology.GpuUsageStatus{ - Utilization: topology.Range{ - Min: 80, - Max: 100, - }, - FbUsed: int(float64(expectedTopology.GpuMemory) * 0.5), + When("informed of a shared GPU pod", func() { + var ( + expectedTopology *topology.NodeTopology + ) + + var ( + expectTopologyToBeUpdatedWithReservationPod = func() { + expectedTopology = createTopology(nodeGpuCount, node) + expectedTopology.Gpus[0].Status.AllocatedBy.Pod = reservationPodName + expectedTopology.Gpus[0].Status.AllocatedBy.Container = reservationPodContainerName + expectedTopology.Gpus[0].Status.AllocatedBy.Namespace = constants.ReservationNs + Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + } + expectTopologyToBeUpdatedWithSharedGpuPod = func() { + expectedTopology.Gpus[0].Status.PodGpuUsageStatus = topology.PodGpuUsageStatusMap{ + podUID: topology.GpuUsageStatus{ + Utilization: topology.Range{ + Min: 80, + Max: 100, }, - } - - Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + FbUsed: int(float64(expectedTopology.GpuMemory) * 0.5), + }, } - ) - Context("with a runai-gpu annotation", func() { - It("should update the cluster topology at its reservation pod location", func() { - gpuIdx := 0 + Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + } + ) - // Test reservation pod handling - reservationPod := createGpuIdxReservationPod(gpuIdx) - _, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Create(context.TODO(), reservationPod, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + Context("with a runai-gpu annotation", func() { + It("should update the cluster topology at its reservation pod location", func() { + gpuIdx := 0 - expectTopologyToBeUpdatedWithReservationPod() + // Test reservation pod handling + reservationPod := createGpuIdxReservationPod(gpuIdx) + _, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Create(context.TODO(), reservationPod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) - // Test shared gpu pod handling - pod := createGpuIdxSharedGpuPod(gpuIdx, 0.5) - _, err = kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + expectTopologyToBeUpdatedWithReservationPod() - podGroup := createPodGroup("train") - _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + // Test shared gpu pod handling + pod := createGpuIdxSharedGpuPod(gpuIdx, 0.5) + _, err = kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) - expectTopologyToBeUpdatedWithSharedGpuPod() - }) + podGroup := createPodGroup("train") + _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + expectTopologyToBeUpdatedWithSharedGpuPod() }) + }) - Context("with a runai-gpu-group label", func() { - It("should update the cluster topology at its reservation pod location", func() { - gpuGroup := "group1" + Context("with a runai-gpu-group label", func() { + It("should update the cluster topology at its reservation pod location", func() { + gpuGroup := "group1" - // Test reservation pod handling - reservationPod := createGpuGroupReservationPod(gpuGroup) - _, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Create(context.TODO(), reservationPod, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + // Test reservation pod handling + reservationPod := createGpuGroupReservationPod(gpuGroup) + _, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Create(context.TODO(), reservationPod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) - expectTopologyToBeUpdatedWithReservationPod() + expectTopologyToBeUpdatedWithReservationPod() - // Test shared gpu pod handling - pod := createGpuGroupSharedGpuPod(gpuGroup, 0.5) - _, err = kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + // Test shared gpu pod handling + pod := createGpuGroupSharedGpuPod(gpuGroup, 0.5) + _, err = kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) - podGroup := createPodGroup("train") - _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + podGroup := createPodGroup("train") + _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) - expectTopologyToBeUpdatedWithSharedGpuPod() - }) + expectTopologyToBeUpdatedWithSharedGpuPod() }) }) + }) - When("informed of a GPU node", func() { - It("should create a new now topology", func() { - node := &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - Labels: map[string]string{ - "nvidia.com/gpu.deploy.device-plugin": "true", - "nvidia.com/gpu.deploy.dcgm-exporter": "true", - }, + When("informed of a GPU node", func() { + It("should create a new now topology", func() { + node := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "nvidia.com/gpu.deploy.device-plugin": "true", + "nvidia.com/gpu.deploy.dcgm-exporter": "true", }, - } + }, + } - _, err := kubeclient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + _, err := kubeclient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) - Eventually(getTopologyNodeFromKube(kubeclient, node.Name)).Should(Not(BeNil())) + Eventually(getTopologyNodeFromKube(kubeclient, node.Name)).Should(Not(BeNil())) - baseTopology, err := getTopologyFromKube(kubeclient)() - Expect(err).ToNot(HaveOccurred()) - Expect(baseTopology).ToNot(BeNil()) + baseTopology, err := getTopologyFromKube(kubeclient)() + Expect(err).ToNot(HaveOccurred()) + Expect(baseTopology).ToNot(BeNil()) - nodeTopology, err := getTopologyNodeFromKube(kubeclient, node.Name)() - Expect(err).ToNot(HaveOccurred()) - Expect(nodeTopology).ToNot(BeNil()) + nodeTopology, err := getTopologyNodeFromKube(kubeclient, node.Name)() + Expect(err).ToNot(HaveOccurred()) + Expect(nodeTopology).ToNot(BeNil()) - Expect(nodeTopology.GpuMemory).To(Equal(baseTopology.Config.NodeAutofill.GpuMemory)) - Expect(nodeTopology.GpuProduct).To(Equal(baseTopology.Config.NodeAutofill.GpuProduct)) - Expect(nodeTopology.Gpus).To(HaveLen(baseTopology.Config.NodeAutofill.GpuCount)) - Expect(nodeTopology.MigStrategy).To(Equal(baseTopology.Config.NodeAutofill.MigStrategy)) - }) + Expect(nodeTopology.GpuMemory).To(Equal(baseTopology.Config.NodeAutofill.GpuMemory)) + Expect(nodeTopology.GpuProduct).To(Equal(baseTopology.Config.NodeAutofill.GpuProduct)) + Expect(nodeTopology.Gpus).To(HaveLen(baseTopology.Config.NodeAutofill.GpuCount)) + Expect(nodeTopology.MigStrategy).To(Equal(baseTopology.Config.NodeAutofill.MigStrategy)) }) + }) - When("informed of a node without GPU labels", func() { - It("should not add the node to the cluster topology", func() { - node := &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - }, - } + When("informed of a node without GPU labels", func() { + It("should not add the node to the cluster topology", func() { + node := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + } - _, err := kubeclient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) - Consistently(getTopologyNodeFromKubeErrorOrNil(kubeclient, node.Name)).Should(MatchError(errors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, topology.GetNodeTopologyCMName(node.Name)))) - }) + _, err := kubeclient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + Consistently(getTopologyNodeFromKubeErrorOrNil(kubeclient, node.Name)).Should(MatchError(errors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, topology.GetNodeTopologyCMName(node.Name)))) }) + }) - // When informed of a node deletion, it should remove the node from the cluster topology - When("informed of a node deletion", func() { - It("should remove the node from the cluster topology", func() { - node := &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - Labels: map[string]string{ - "nvidia.com/gpu.deploy.device-plugin": "true", - "nvidia.com/gpu.deploy.dcgm-exporter": "true", - }, + // When informed of a node deletion, it should remove the node from the cluster topology + When("informed of a node deletion", func() { + It("should remove the node from the cluster topology", func() { + node := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "nvidia.com/gpu.deploy.device-plugin": "true", + "nvidia.com/gpu.deploy.dcgm-exporter": "true", }, - } + }, + } - _, err := kubeclient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + _, err := kubeclient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) - Eventually(getTopologyNodeFromKubeErrorOrNil(kubeclient, node.Name)).Should(BeNil()) + Eventually(getTopologyNodeFromKubeErrorOrNil(kubeclient, node.Name)).Should(BeNil()) - err = kubeclient.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{}) - Expect(err).ToNot(HaveOccurred()) + err = kubeclient.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) - Eventually(getTopologyNodeFromKubeErrorOrNil(kubeclient, node.Name)).Should(Not(BeNil())) - }) + Eventually(getTopologyNodeFromKubeErrorOrNil(kubeclient, node.Name)).Should(Not(BeNil())) }) }) }) diff --git a/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go index 8732325..2207192 100644 --- a/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go +++ b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go @@ -27,6 +27,12 @@ func (p *PodHandler) handleGpuReservationPodAddition(pod *v1.Pod, nodeTopology * } func (p *PodHandler) setReservationPodGpuIdxAnnotation(pod *v1.Pod, nodeTopology *topology.NodeTopology) error { + // DEPRECATED: Prior to 2.17, the scheduler had set the GPU index annotation for the reservation pod, + // therefore we skip setting the annotation if it already exists to support backward compatibility. + if _, ok := pod.Annotations[constants.ReservationPodGpuIdxAnnotation]; ok { + return nil + } + // Find the GPU allocated by the pod allocatedGpuID, err := findPodGpuID(pod, nodeTopology) if err != nil { diff --git a/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go b/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go index cd85b50..ab584cd 100644 --- a/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go +++ b/internal/status-updater/handlers/pod/shared_gpu_pod_handler.go @@ -47,7 +47,6 @@ func (p *PodHandler) handleSharedGpuPodDeletion(pod *v1.Pod, nodeTopology *topol } func (p *PodHandler) calculateAndSetPodGpuUsageStatus(pod *v1.Pod, nodeTopology *topology.NodeTopology) error { - reservationPodGpuIdx, err := getMatchingReservationPodGpuIdx(p.kubeClient, pod, nodeTopology) if err != nil { return err From eb8853d68fe47656fe2ebb887ece6142eb4d853b Mon Sep 17 00:00:00 2001 From: Guy Shaibi Date: Sun, 31 Mar 2024 13:41:03 +0300 Subject: [PATCH 4/7] Add tests --- internal/status-updater/app_test.go | 241 +++++++++++++++++----------- 1 file changed, 148 insertions(+), 93 deletions(-) diff --git a/internal/status-updater/app_test.go b/internal/status-updater/app_test.go index 63db9ac..7afb201 100644 --- a/internal/status-updater/app_test.go +++ b/internal/status-updater/app_test.go @@ -29,6 +29,7 @@ import ( "k8s.io/client-go/kubernetes" kfake "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/rest" + "k8s.io/utils/ptr" "github.com/run-ai/fake-gpu-operator/internal/common/app" ) @@ -137,118 +138,169 @@ var _ = Describe("StatusUpdater", func() { }) When("informed of a dedicated GPU pod", func() { - Context("creation and deletion", func() { - type testCase struct { - podGpuCount int64 - podPhase v1.PodPhase - podConditions []v1.PodCondition - workloadType string - } + Context("non-reservation pod", func() { + Context("creation and deletion", func() { + type testCase struct { + podGpuCount int64 + podPhase v1.PodPhase + podConditions []v1.PodCondition + workloadType string + } - cases := []testCase{} + cases := []testCase{} - for i := int64(1); i <= nodeGpuCount; i++ { - for _, phase := range []v1.PodPhase{v1.PodPending, v1.PodRunning, v1.PodSucceeded, v1.PodFailed, v1.PodUnknown} { - for _, workloadType := range []string{"build", "train", "interactive-preemptible", "inference"} { - tCase := testCase{ - podGpuCount: i, - podPhase: phase, - workloadType: workloadType, - } + for i := int64(1); i <= nodeGpuCount; i++ { + for _, phase := range []v1.PodPhase{v1.PodPending, v1.PodRunning, v1.PodSucceeded, v1.PodFailed, v1.PodUnknown} { + for _, workloadType := range []string{"build", "train", "interactive-preemptible", "inference"} { + tCase := testCase{ + podGpuCount: i, + podPhase: phase, + workloadType: workloadType, + } - if phase == v1.PodPending { // Pending pods can be unscheduled or scheduled (e.g. when scheduled but the containers are not started yet) - tCase.podConditions = []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionFalse}} - cases = append(cases, tCase) + if phase == v1.PodPending { // Pending pods can be unscheduled or scheduled (e.g. when scheduled but the containers are not started yet) + tCase.podConditions = []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionFalse}} + cases = append(cases, tCase) - tCase.podConditions = []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}} - cases = append(cases, tCase) - } else { // Non-pending pods are always scheduled - tCase.podConditions = []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}} - cases = append(cases, tCase) + tCase.podConditions = []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}} + cases = append(cases, tCase) + } else { // Non-pending pods are always scheduled + tCase.podConditions = []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}} + cases = append(cases, tCase) + } } } } - } - for _, caseDetails := range cases { - caseBaseName := fmt.Sprintf("GPU count %d, pod phase %s, workloadType: %s", caseDetails.podGpuCount, caseDetails.podPhase, caseDetails.workloadType) - caseDetails := caseDetails - It(caseBaseName, func() { - By("creating the pod") - pod := createDedicatedGpuPod(caseDetails.podGpuCount, caseDetails.podPhase, caseDetails.podConditions) - _, err := kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) - podGroup := createPodGroup(caseDetails.workloadType) - _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) - - expectedTopology := createTopology(nodeGpuCount, node) - isPodScheduledConditionTrue := isConditionTrue(caseDetails.podConditions, v1.PodScheduled) - - if caseDetails.podPhase == v1.PodRunning || - ((caseDetails.podPhase == v1.PodPending || caseDetails.podPhase == v1.PodUnknown) && isPodScheduledConditionTrue) { - for i := 0; i < int(caseDetails.podGpuCount); i++ { - expectedTopology.Gpus[i].Status.PodGpuUsageStatus = topology.PodGpuUsageStatusMap{ - podUID: topology.GpuUsageStatus{ - Utilization: getExpectedUtilization(caseDetails.workloadType, caseDetails.podPhase), - FbUsed: expectedTopology.GpuMemory, - UseKnativeUtilization: caseDetails.workloadType == "inference" && caseDetails.podPhase == v1.PodRunning, - }, + for _, caseDetails := range cases { + caseBaseName := fmt.Sprintf("GPU count %d, pod phase %s, workloadType: %s", caseDetails.podGpuCount, caseDetails.podPhase, caseDetails.workloadType) + caseDetails := caseDetails + It(caseBaseName, func() { + By("creating the pod") + pod := createDedicatedGpuPod(caseDetails.podGpuCount, caseDetails.podPhase, caseDetails.podConditions) + _, err := kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + podGroup := createPodGroup(caseDetails.workloadType) + _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + expectedTopology := createTopology(nodeGpuCount, node) + isPodScheduledConditionTrue := isConditionTrue(caseDetails.podConditions, v1.PodScheduled) + + if caseDetails.podPhase == v1.PodRunning || + ((caseDetails.podPhase == v1.PodPending || caseDetails.podPhase == v1.PodUnknown) && isPodScheduledConditionTrue) { + for i := 0; i < int(caseDetails.podGpuCount); i++ { + expectedTopology.Gpus[i].Status.PodGpuUsageStatus = topology.PodGpuUsageStatusMap{ + podUID: topology.GpuUsageStatus{ + Utilization: getExpectedUtilization(caseDetails.workloadType, caseDetails.podPhase), + FbUsed: expectedTopology.GpuMemory, + UseKnativeUtilization: caseDetails.workloadType == "inference" && caseDetails.podPhase == v1.PodRunning, + }, + } + expectedTopology.Gpus[i].Status.AllocatedBy.Pod = podName + expectedTopology.Gpus[i].Status.AllocatedBy.Container = containerName + expectedTopology.Gpus[i].Status.AllocatedBy.Namespace = podNamespace } - expectedTopology.Gpus[i].Status.AllocatedBy.Pod = podName - expectedTopology.Gpus[i].Status.AllocatedBy.Container = containerName - expectedTopology.Gpus[i].Status.AllocatedBy.Namespace = podNamespace } - } - Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) - By("deleting the pod") - err = kubeclient.CoreV1().Pods(podNamespace).Delete(context.TODO(), podName, metav1.DeleteOptions{}) - Expect(err).ToNot(HaveOccurred()) - Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(createTopology(nodeGpuCount, node))) - }) - } - }) + By("deleting the pod") + err = kubeclient.CoreV1().Pods(podNamespace).Delete(context.TODO(), podName, metav1.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) + Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(createTopology(nodeGpuCount, node))) + }) + } + }) - Context("update", func() { - When("pod phase changes", func() { - BeforeEach(func() { - // Create a pod in the pending phase with scheduled condition true - pod := createDedicatedGpuPod(1, v1.PodPending, []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}}) - _, err := kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) - workloadType := "train" - podGroup := createPodGroup(workloadType) - _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) - Expect(err).ToNot(HaveOccurred()) + Context("update", func() { + When("pod phase changes", func() { + BeforeEach(func() { + // Create a pod in the pending phase with scheduled condition true + pod := createDedicatedGpuPod(1, v1.PodPending, []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}}) + _, err := kubeclient.CoreV1().Pods(podNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + workloadType := "train" + podGroup := createPodGroup(workloadType) + _, err = dynamicClient.Resource(schema.GroupVersionResource{Group: "scheduling.run.ai", Version: "v1", Resource: "podgroups"}).Namespace(podNamespace).Create(context.TODO(), podGroup, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + expectedTopology := createTopology(nodeGpuCount, node) + expectedTopology.Gpus[0].Status.PodGpuUsageStatus = topology.PodGpuUsageStatusMap{ + podUID: topology.GpuUsageStatus{ + Utilization: getExpectedUtilization(workloadType, v1.PodPending), + FbUsed: expectedTopology.GpuMemory, + UseKnativeUtilization: false, + }, + } + + By("creating the pod with pending phase") + Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + + By("updating the pod phase to running") + pod, err = kubeclient.CoreV1().Pods(podNamespace).Get(context.TODO(), podName, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + pod.Status.Phase = v1.PodRunning + _, err = kubeclient.CoreV1().Pods(podNamespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) - expectedTopology := createTopology(nodeGpuCount, node) - expectedTopology.Gpus[0].Status.PodGpuUsageStatus = topology.PodGpuUsageStatusMap{ - podUID: topology.GpuUsageStatus{ - Utilization: getExpectedUtilization(workloadType, v1.PodPending), + expectedTopology.Gpus[0].Status.PodGpuUsageStatus[podUID] = topology.GpuUsageStatus{ + Utilization: getExpectedUtilization(workloadType, v1.PodRunning), FbUsed: expectedTopology.GpuMemory, UseKnativeUtilization: false, - }, - } + } - By("creating the pod with pending phase") - Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + }) + }) + }) + }) - By("updating the pod phase to running") - pod, err = kubeclient.CoreV1().Pods(podNamespace).Get(context.TODO(), podName, metav1.GetOptions{}) + Context("reservation pod", func() { + When("GPU Index annotation is set (pre 2.17)", func() { + It("should not reset the GPU Index annotation", func() { + gpuIdx := 0 + + reservationPod := createGpuIdxReservationPod(ptr.To(gpuIdx)) + _, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Create(context.TODO(), reservationPod, metav1.CreateOptions{}) Expect(err).ToNot(HaveOccurred()) - pod.Status.Phase = v1.PodRunning - _, err = kubeclient.CoreV1().Pods(podNamespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}) + + // Expect the pod to consistently contain the GPU Index annotation + Consistently(func() (string, error) { + pod, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Get(context.TODO(), reservationPodName, metav1.GetOptions{}) + if err != nil { + return "", err + } + return pod.Annotations[constants.ReservationPodGpuIdxAnnotation], nil + }).Should(Equal(strconv.Itoa(gpuIdx))) + }) + }) + + When("Gpu Index annotation is not set (post 2.17)", func() { + It("should set the GPU Index annotation with the GPU UUID", func() { + reservationPod := createGpuIdxReservationPod(nil) + _, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Create(context.TODO(), reservationPod, metav1.CreateOptions{}) Expect(err).ToNot(HaveOccurred()) - expectedTopology.Gpus[0].Status.PodGpuUsageStatus[podUID] = topology.GpuUsageStatus{ - Utilization: getExpectedUtilization(workloadType, v1.PodRunning), - FbUsed: expectedTopology.GpuMemory, - UseKnativeUtilization: false, - } + Eventually(func() (bool, error) { + pod, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Get(context.TODO(), reservationPodName, metav1.GetOptions{}) + if err != nil { + return false, err + } + + nodeTopology, err := getTopologyNodeFromKube(kubeclient, node)() + if err != nil || nodeTopology == nil { + return false, err + } - Eventually(getTopologyNodeFromKube(kubeclient, node)).Should(Equal(expectedTopology)) + for _, gpuDetails := range nodeTopology.Gpus { + if gpuDetails.ID == pod.Annotations[constants.ReservationPodGpuIdxAnnotation] { + return true, nil + } + } + + return false, nil + }).Should(BeTrue()) }) }) }) @@ -287,7 +339,7 @@ var _ = Describe("StatusUpdater", func() { gpuIdx := 0 // Test reservation pod handling - reservationPod := createGpuIdxReservationPod(gpuIdx) + reservationPod := createGpuIdxReservationPod(ptr.To(gpuIdx)) _, err := kubeclient.CoreV1().Pods(constants.ReservationNs).Create(context.TODO(), reservationPod, metav1.CreateOptions{}) Expect(err).ToNot(HaveOccurred()) @@ -548,9 +600,12 @@ func createBaseSharedGpuPod(gpuFraction float64) *v1.Pod { } } -func createGpuIdxReservationPod(gpuIdx int) *v1.Pod { +func createGpuIdxReservationPod(gpuIdx *int) *v1.Pod { pod := createBaseReservationPod() - pod.Annotations[constants.ReservationPodGpuIdxAnnotation] = strconv.Itoa(gpuIdx) + + if gpuIdx != nil { + pod.Annotations[constants.ReservationPodGpuIdxAnnotation] = strconv.Itoa(*gpuIdx) + } return pod } From c0bcc2ecb0c0d580476a8109f81df38b23c190d7 Mon Sep 17 00:00:00 2001 From: Guy Shaibi Date: Sun, 31 Mar 2024 13:41:08 +0300 Subject: [PATCH 5/7] Update k8s.io/utils to v0.0.0-20240310230437-4693a0247e57 --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 8efc487..31ad8ae 100644 --- a/go.mod +++ b/go.mod @@ -98,7 +98,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect k8s.io/klog/v2 v2.80.1 // indirect k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect - k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 // indirect + k8s.io/utils v0.0.0-20240310230437-4693a0247e57 sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect sigs.k8s.io/yaml v1.3.0 // indirect diff --git a/go.sum b/go.sum index 9d9b7c5..7d7c088 100644 --- a/go.sum +++ b/go.sum @@ -900,8 +900,8 @@ k8s.io/kubelet v0.24.0 h1:fH+D6mSr4DGIeHp/O2+mCEJhkVq3Gpgv9BVOHI+GrWY= k8s.io/kubelet v0.24.0/go.mod h1:p3BBacmHTCMpUf+nluhlyzuGHmONKAspqCvpu9oPAyA= k8s.io/utils v0.0.0-20210802155522-efc7438f0176/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= -k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 h1:KTgPnR10d5zhztWptI952TNtt/4u5h3IzDXkdIMuo2Y= -k8s.io/utils v0.0.0-20221128185143-99ec85e7a448/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240310230437-4693a0247e57 h1:gbqbevonBh57eILzModw6mrkbwM0gQBEuevE/AaBsHY= +k8s.io/utils v0.0.0-20240310230437-4693a0247e57/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= From 03d580c9c9b6a90f296c92b0044a8a2967eb1540 Mon Sep 17 00:00:00 2001 From: Guy Shaibi Date: Sun, 31 Mar 2024 13:42:21 +0300 Subject: [PATCH 6/7] Update IsPodScheduled comment --- internal/status-updater/util/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/status-updater/util/util.go b/internal/status-updater/util/util.go index 1f0c618..5833db7 100644 --- a/internal/status-updater/util/util.go +++ b/internal/status-updater/util/util.go @@ -26,7 +26,7 @@ func IsPodTerminated(pod *v1.Pod) bool { } func IsPodScheduled(pod *v1.Pod) bool { - // This should be checked using the pod's PodScheduled condition once https://github.com/run-ai/runai-engine/pull/174 is merged and available. + // This may be checked using the pod's PodScheduled condition once https://github.com/run-ai/runai-engine/pull/174 is merged and available. return pod.Spec.NodeName != "" } From 7b1e8e6d2c026dfff264d4c480660ed842faa6c3 Mon Sep 17 00:00:00 2001 From: Guy Shaibi Date: Sun, 31 Mar 2024 13:43:05 +0300 Subject: [PATCH 7/7] Refactor setReservationPodGpuIdxAnnotation to setReservationPodGpuIdxAnnotationIfNeeded --- .../handlers/pod/gpu_reservation_pod_handler.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go index 2207192..47b3897 100644 --- a/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go +++ b/internal/status-updater/handlers/pod/gpu_reservation_pod_handler.go @@ -18,7 +18,7 @@ func (p *PodHandler) handleGpuReservationPodAddition(pod *v1.Pod, nodeTopology * return nil } - err := p.setReservationPodGpuIdxAnnotation(pod, nodeTopology) + err := p.setReservationPodGpuIdxAnnotationIfNeeded(pod, nodeTopology) if err != nil { return fmt.Errorf("failed to set GPU index annotation for reservation pod %s: %w", pod.Name, err) } @@ -26,7 +26,7 @@ func (p *PodHandler) handleGpuReservationPodAddition(pod *v1.Pod, nodeTopology * return nil } -func (p *PodHandler) setReservationPodGpuIdxAnnotation(pod *v1.Pod, nodeTopology *topology.NodeTopology) error { +func (p *PodHandler) setReservationPodGpuIdxAnnotationIfNeeded(pod *v1.Pod, nodeTopology *topology.NodeTopology) error { // DEPRECATED: Prior to 2.17, the scheduler had set the GPU index annotation for the reservation pod, // therefore we skip setting the annotation if it already exists to support backward compatibility. if _, ok := pod.Annotations[constants.ReservationPodGpuIdxAnnotation]; ok {