@@ -199,9 +199,6 @@ def cleanup(self, container_id: str, log_handler: LogHandler) -> None:
199199 self ._delete_services (log_handler )
200200 self ._delete_job (log_handler )
201201
202- self ._wait_for_pod_to_be_deleted (container_id , log_handler )
203- log_handler .write (f"Job { container_id } is cleaned up." , flush = True )
204-
205202 def update_logs (self , container_id : str , log_handler : LogHandler ) -> None :
206203 try :
207204 pod = self ._get_job_pod (container_id )
@@ -460,7 +457,7 @@ def _create_job(
460457 job_result = self .client .batch_api .create_namespaced_job (
461458 namespace = self .namespace , body = job_spec
462459 )
463- LOGGER .info (f"Submitted Job template: { self .job_name } , " )
460+ LOGGER .info (f"Submitted Job template: { self .job_name } " )
464461 except ApiException as e :
465462 LOGGER .error (f"API Exception { e } " )
466463 raise ContainerStartError (str (e )) from e
@@ -485,11 +482,44 @@ def _delete_job(self, log_handler: LogHandler) -> None:
485482
486483 if result .status == "Failure" :
487484 raise ContainerCleanupError (f"{ result } " )
485+
486+ watcher = watch .Watch ()
487+ try :
488+ for event in watcher .stream (
489+ self .client .core_api .list_namespaced_pod ,
490+ namespace = self .namespace ,
491+ label_selector = f"job-name={ self .job_name } " ,
492+ timeout_seconds = POD_DELETE_TIMEOUT ,
493+ ):
494+ if event ["type" ] == "DELETED" :
495+ log_handler .write (
496+ f"Pod '{ self .job_name } ' is deleted." ,
497+ flush = True ,
498+ )
499+ break
500+
501+ log_handler .write (
502+ f"Job { self .job_name } is cleaned up." ,
503+ flush = True ,
504+ )
505+ except ApiException as e :
506+ if e .status == status .HTTP_404_NOT_FOUND :
507+ message = (
508+ f"Pod '{ self .job_name } ' not found (404), "
509+ "assuming it's already deleted."
510+ )
511+ log_handler .write (message , flush = True )
512+ return
513+ log_handler .write (
514+ f"Error while waiting for deletion: { e } " , flush = True
515+ )
516+ raise ContainerCleanupError (
517+ f"Error during cleanup: { str (e )} "
518+ ) from e
519+ finally :
520+ watcher .stop ()
488521 else :
489522 LOGGER .info (f"Job for { self .job_name } has been removed" )
490- log_handler .write (
491- f"Job for { self .job_name } has been removed." , True
492- )
493523
494524 except ApiException as e :
495525 raise ContainerCleanupError (
@@ -532,50 +562,6 @@ def _wait_for_pod_to_start(self, log_handler: LogHandler) -> None:
532562 finally :
533563 watcher .stop ()
534564
535- def _wait_for_pod_to_be_deleted (
536- self , pod_name : str , log_handler : LogHandler
537- ) -> None :
538- log_handler .write (
539- f"Waiting for pod '{ pod_name } ' to be deleted..." ,
540- flush = True ,
541- )
542- watcher = watch .Watch ()
543- try :
544- for event in watcher .stream (
545- self .client .core_api .list_namespaced_pod ,
546- namespace = self .namespace ,
547- label_selector = f"job-name={ self .job_name } " ,
548- timeout_seconds = POD_DELETE_TIMEOUT ,
549- ):
550- LOGGER .debug (f"Received event: { event } " )
551- pod_name = event ["object" ].metadata .name
552- pod_phase = event ["object" ].status .phase
553- LOGGER .debug (f"Pod { pod_name } - { pod_phase } " )
554-
555- if event ["type" ] == "DELETED" :
556- # Pod successfully deleted
557- log_handler .write (
558- f"Pod '{ pod_name } ' has been deleted." , flush = True
559- )
560- break
561- except ApiException as e :
562- if e .status == status .HTTP_404_NOT_FOUND :
563- message = (
564- f"Pod '{ pod_name } ' not found (404), "
565- "assuming it's already deleted."
566- )
567- log_handler .write (message , flush = True )
568- return
569- log_handler .write (
570- f"Error while waiting for deletion: { e } " , flush = True
571- )
572- raise ContainerCleanupError (
573- f"Error during cleanup: { str (e )} "
574- ) from e
575-
576- finally :
577- watcher .stop ()
578-
579565 def _set_namespace (self ) -> None :
580566 namespace_file = (
581567 "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
0 commit comments