From 710b4b9f04fb2963beb8b33308b534ef6e84bacc Mon Sep 17 00:00:00 2001 From: d-hayashi Date: Tue, 27 Aug 2024 10:16:08 +0900 Subject: [PATCH] fix: delete slurm jobs if pod's state is succeeded or failed --- internal/watcher/watcher.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 537022b..2994394 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -169,6 +169,10 @@ func (w *watcher) fetchJobStateOnKubernetes() error { } for _, pod := range pods.Items { + // Filter out pods with state `Succeeded` or `Failed` + if pod.Status.Phase == "Succeeded" || pod.Status.Phase == "Failed" { + continue + } annotations := pod.GetAnnotations() if UUID, uuidExists := annotations["k8s-slurm-injector/uuid"]; uuidExists { if jobId, jobIdExists := annotations["k8s-slurm-injector/jobid"]; jobIdExists { @@ -201,6 +205,10 @@ func (w *watcher) fetchJobStateOnKubernetes() error { if UUID, uuidExists := annotations["k8s-slurm-injector/uuid"]; uuidExists { podExists := false for _, pod := range pods.Items { + // Filter out pods with state `Succeeded` or `Failed` + if pod.Status.Phase == "Succeeded" || pod.Status.Phase == "Failed" { + continue + } if podUUID, podUUIDExists := pod.GetAnnotations()["k8s-slurm-injector/uuid"]; podUUIDExists { if podUUID == UUID { podExists = true