From dd3f863cc1250a9f67c635db704eade7bce716de Mon Sep 17 00:00:00 2001
From: Farley <farley@neonsurge.com>
Date: Tue, 3 Aug 2021 19:06:54 +1200
Subject: [PATCH 1/7] adding notes and more variables into kubernetes example

---
 examples/kubernetes.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/kubernetes.py b/examples/kubernetes.py
index be7a58e1c6..7d1f1cce34 100644
--- a/examples/kubernetes.py
+++ b/examples/kubernetes.py
@@ -44,9 +44,11 @@
 
 class PerlPi(KubernetesJobTask):
 
-    name = "pi"
-    max_retrials = 3
-    spec_schema = {
+    name = "pi"                         # The name (prefix) of the job that will be created for identification purposes
+    backoff_limit = 2                   # This is the number of retries incase there is a pod/node/code failure
+    kubernetes_namespace = "farley"     # This is the kubernetes namespace you wish to run in
+    print_pod_logs_on_exit = True       # Set this to true if you wish to see the logs of this
+    spec_schema = {                     # This is the standard "spec" of the containers in this job, this is a good sane example
         "containers": [{
             "name": "pi",
             "image": "perl",

From 1aa77becf250fd2ee9683e0e902cf78719711773 Mon Sep 17 00:00:00 2001
From: Farley <farley@neonsurge.com>
Date: Tue, 3 Aug 2021 19:39:37 +1200
Subject: [PATCH 2/7] initial poc with official kubernetes library

---
 examples/kubernetes.py          |  23 +--
 luigi/contrib/kubernetes.py     | 243 ++++++++++++++------------------
 test/contrib/kubernetes_test.py |  36 ++---
 tox.ini                         |   1 +
 4 files changed, 138 insertions(+), 165 deletions(-)

diff --git a/examples/kubernetes.py b/examples/kubernetes.py
index 7d1f1cce34..83795f1a7f 100644
--- a/examples/kubernetes.py
+++ b/examples/kubernetes.py
@@ -19,22 +19,25 @@
 
 Requires:
 
-- pykube: ``pip install pykube-ng``
-- A local minikube custer up and running: http://kubernetes.io/docs/getting-started-guides/minikube/
-
-**WARNING**: For Python versions < 3.5 the kubeconfig file must point to a Kubernetes API
-hostname, and NOT to an IP address.
+- Official kubernetes-client python library: ``pip install kubernetes``
+    - See: https://github.com/kubernetes-client/python/
+- A kubectl configuration and that is active and functional
+    - Can be your kubectl config setup for EKS with `aws eks update-kubeconfig --name clustername`
+    - Or access to any other hosted/managed/self-setup Kubernetes cluster
+    - For devs can a local minikube custer up and running: http://kubernetes.io/docs/getting-started-guides/minikube/
+    - Or for devs Docker Desktop has support for minikube: https://www.docker.com/products/docker-desktop
 
 You can run this code example like this:
 
     .. code:: console
-        $ luigi --module examples.kubernetes_job PerlPi --local-scheduler
+        $ PYTHONPATH=. luigi --module examples.kubernetes_job PerlPi --local-scheduler
+        # Or alternatively...
+        $ python -m luigi --module examples.kubernetes PerlPi --local-scheduler
 
 Running this code will create a pi-luigi-uuid kubernetes job within the cluster
-pointed to by the default context in "~/.kube/config".
-
-If running within a kubernetes cluster, set auth_method = "service-account" to
-access the local cluster.
+pointed to by the default context in "~/.kube/config".  It will also auto-detect if this
+is running in an Kubernetes Cluster and has functional access to the local cluster's APIs
+via an ClusterRole.
 """
 
 # import os
diff --git a/luigi/contrib/kubernetes.py b/luigi/contrib/kubernetes.py
index 82dc769580..6dc5c8c84b 100644
--- a/luigi/contrib/kubernetes.py
+++ b/luigi/contrib/kubernetes.py
@@ -28,9 +28,11 @@
 
 Requires:
 
-- pykube: ``pip install pykube-ng``
+- Official kubernetes-client python library: ``pip install kubernetes``
+    - See: https://github.com/kubernetes-client/python/
 
-Written and maintained by Marco Capuccini (@mcapuccini).
+Written and maintained by Marco Capuccini (@mcapuccini)
+Pivoted to official kubernetes-client python module by Farley (@AndrewFarley)
 """
 import logging
 import time
@@ -42,74 +44,46 @@
 logger = logging.getLogger('luigi-interface')
 
 try:
-    from pykube.config import KubeConfig
-    from pykube.http import HTTPClient
-    from pykube.objects import Job, Pod
-except ImportError:
-    logger.warning('pykube is not installed. KubernetesJobTask requires pykube.')
+    import kubernetes as kubernetes_api
+except ImportError as i:
+    logger.warning("WARNING: kubernetes is not installed. KubernetesJobTask requires kubernetes")
+    logger.warning("  Please run 'pip install kubernetes' and try again")
 
 
 class kubernetes(luigi.Config):
-    auth_method = luigi.Parameter(
-        default="kubeconfig",
-        description="Authorization method to access the cluster")
-    kubeconfig_path = luigi.Parameter(
-        default="~/.kube/config",
-        description="Path to kubeconfig file for cluster authentication")
-    max_retrials = luigi.IntParameter(
-        default=0,
-        description="Max retrials in event of job failure")
     kubernetes_namespace = luigi.OptionalParameter(
         default=None,
         description="K8s namespace in which the job will run")
 
-
 class KubernetesJobTask(luigi.Task):
     __DEFAULT_POLL_INTERVAL = 5  # see __track_job
     __DEFAULT_POD_CREATION_INTERVAL = 5
-    _kubernetes_config = None  # Needs to be loaded at runtime
 
     def _init_kubernetes(self):
         self.__logger = logger
-        self.__logger.debug("Kubernetes auth method: " + self.auth_method)
-        if self.auth_method == "kubeconfig":
-            self.__kube_api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path))
-        elif self.auth_method == "service-account":
-            self.__kube_api = HTTPClient(KubeConfig.from_service_account())
-        else:
-            raise ValueError("Illegal auth_method")
+        self.__kubernetes_core_api = kubernetes_api.client.CoreV1Api()
+        # Configs can be set in Configuration class directly or using helper utility, by default lets try to load in-cluster config
+        # TODO: Make library support forcing one of these instead of automatic cascading logic...?
+        try:
+           kubernetes_api.config.load_incluster_config()
+        except Exception as e:
+           try:
+              kubernetes_api.config.load_kube_config()
+           except Exception as ex:
+              raise ex
+        
+        # Create our API instances for Kubernetes
+        self.__kubernetes_api_instance = kubernetes_api.client.CoreV1Api()
+        self.__kubernetes_batch_instance = kubernetes_api.client.BatchV1Api()
+        
         self.job_uuid = str(uuid.uuid4().hex)
         now = datetime.utcnow()
-        self.uu_name = "%s-%s-%s" % (self.name, now.strftime('%Y%m%d%H%M%S'), self.job_uuid[:16])
-
-    @property
-    def auth_method(self):
-        """
-        This can be set to ``kubeconfig`` or ``service-account``.
-        It defaults to ``kubeconfig``.
-
-        For more details, please refer to:
-
-        - kubeconfig: http://kubernetes.io/docs/user-guide/kubeconfig-file
-        - service-account: http://kubernetes.io/docs/user-guide/service-accounts
-        """
-        return self.kubernetes_config.auth_method
+        
+        # Set a namespace if not specified because we run jobs in a specific namespace, always
+        if self.kubernetes_namespace is None:
+            self.kubernetes_namespace = "default"
 
-    @property
-    def kubeconfig_path(self):
-        """
-        Path to kubeconfig file used for cluster authentication.
-        It defaults to "~/.kube/config", which is the default location
-        when using minikube (http://kubernetes.io/docs/getting-started-guides/minikube).
-        When auth_method is ``service-account`` this property is ignored.
-
-        **WARNING**: For Python versions < 3.5 kubeconfig must point to a Kubernetes API
-        hostname, and NOT to an IP address.
-
-        For more details, please refer to:
-        http://kubernetes.io/docs/user-guide/kubeconfig-file
-        """
-        return self.kubernetes_config.kubeconfig_path
+        self.uu_name = "%s-%s-%s" % (self.name, now.strftime('%Y%m%d%H%M%S'), self.job_uuid[:16])
 
     @property
     def kubernetes_namespace(self):
@@ -120,7 +94,7 @@ def kubernetes_namespace(self):
         For more details, please refer to:
         https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
         """
-        return self.kubernetes_config.kubernetes_namespace
+        return "default"
 
     @property
     def name(self):
@@ -166,17 +140,10 @@ def spec_schema(self):
         """
         raise NotImplementedError("subclass must define spec_schema")
 
-    @property
-    def max_retrials(self):
-        """
-        Maximum number of retrials in case of failure.
-        """
-        return self.kubernetes_config.max_retrials
-
     @property
     def backoff_limit(self):
         """
-        Maximum number of retries before considering the job as failed.
+        Maximum number of retries before considering the job as failed.  6 times is the Kubernetes default
         See: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#pod-backoff-failure-policy
         """
         return 6
@@ -203,12 +170,6 @@ def active_deadline_seconds(self):
         """
         return None
 
-    @property
-    def kubernetes_config(self):
-        if not self._kubernetes_config:
-            self._kubernetes_config = kubernetes()
-        return self._kubernetes_config
-
     @property
     def poll_interval(self):
         """How often to poll Kubernetes for job status, in seconds."""
@@ -250,30 +211,30 @@ def signal_complete(self):
         pass
 
     def __get_pods(self):
-        pod_objs = Pod.objects(self.__kube_api, namespace=self.kubernetes_namespace) \
-            .filter(selector="job-name=" + self.uu_name) \
-            .response['items']
-        return [Pod(self.__kube_api, p) for p in pod_objs]
+        api_response = self.__kubernetes_api_instance.list_namespaced_pod(self.kubernetes_namespace, limit=10, label_selector="job-name=" + self.uu_name)
+        return api_response.items
 
     def __get_job(self):
-        jobs = Job.objects(self.__kube_api, namespace=self.kubernetes_namespace) \
-            .filter(selector="luigi_task_id=" + self.job_uuid) \
-            .response['items']
-        assert len(jobs) == 1, "Kubernetes job " + self.uu_name + " not found"
-        return Job(self.__kube_api, jobs[0])
+        api_response = self.__kubernetes_batch_instance.list_namespaced_job(self.kubernetes_namespace, limit=10, label_selector="luigi_task_id=" + self.job_uuid)
+        assert len(api_response.items) == 1, "Kubernetes job " + self.uu_name + " not found"
+        return api_response.items[0]
 
     def __print_pod_logs(self):
         for pod in self.__get_pods():
-            logs = pod.logs(timestamps=True).strip()
-            self.__logger.info("Fetching logs from " + pod.name)
-            if len(logs) > 0:
-                for line in logs.split('\n'):
-                    self.__logger.info(line)
+            self.__logger.info("Fetching logs from " + pod.metadata.name)
+            try:
+                api_response = self.__kubernetes_api_instance.read_namespaced_pod_log(pod.metadata.name, self.kubernetes_namespace, timestamps=True)
+                if len(api_response) > 0:
+                    for line in api_response.split('\n'):
+                        self.__logger.info(line)
+            except Exception as e:
+                logger.warning("WARNING: Unable to get logs because...")
+                logger.warning(e)
 
     def __print_kubectl_hints(self):
         self.__logger.info("To stream Pod logs, use:")
         for pod in self.__get_pods():
-            self.__logger.info("`kubectl logs -f pod/%s -n %s`" % (pod.name, pod.namespace))
+            self.__logger.info("`kubectl logs -f pod/%s -n %s`" % (pod.metadata.name, pod.metadata.namespace))
 
     def __verify_job_has_started(self):
         """Asserts that the job has successfully started"""
@@ -291,101 +252,109 @@ def __verify_job_has_started(self):
 
         assert len(pods) > 0, "No pod scheduled by " + self.uu_name
         for pod in pods:
-            status = pod.obj['status']
-            for cont_stats in status.get('containerStatuses', []):
-                if 'terminated' in cont_stats['state']:
-                    t = cont_stats['state']['terminated']
+            for cont_stats in pod.status.container_statuses:
+                if cont_stats.state.terminated is not None:
+                    t = cont_stats.state.terminated
                     err_msg = "Pod %s %s (exit code %d). Logs: `kubectl logs pod/%s`" % (
                         pod.name, t['reason'], t['exitCode'], pod.name)
                     assert t['exitCode'] == 0, err_msg
 
-                if 'waiting' in cont_stats['state']:
-                    wr = cont_stats['state']['waiting']['reason']
+                if cont_stats.state.waiting is not None:
+                    wr = cont_stats.state.waiting.reason
                     assert wr == 'ContainerCreating', "Pod %s %s. Logs: `kubectl logs pod/%s`" % (
                         pod.name, wr, pod.name)
 
-            for cond in status.get('conditions', []):
-                if 'message' in cond:
-                    if cond['reason'] == 'ContainersNotReady':
+            for cond in pod.status.conditions:
+                if cond.message is not None:
+                    if cond.message == 'ContainersNotReady':
+                        return False
+                    if cond.status != 'False':
+                        self.__logger.warning("[ERROR] %s - %s" % (cond.reason, cond.message))
                         return False
-                    assert cond['status'] != 'False', \
-                        "[ERROR] %s - %s" % (cond['reason'], cond['message'])
         return True
 
+    def __scale_down_job(self, job_name):
+        api_response = self.__kubernetes_batch_instance.patch_namespaced_job(
+            job_name, self.kubernetes_namespace, {"spec": {"parallelism": 0, "backoff_limit": 0}})
+        if api_response.spec.parallelism == 0:
+            return True
+        return False
+
     def __get_job_status(self):
         """Return the Kubernetes job status"""
         # Figure out status and return it
         job = self.__get_job()
 
-        if "succeeded" in job.obj["status"] and job.obj["status"]["succeeded"] > 0:
-            job.scale(replicas=0)
+        if job.status.succeeded is not None:
+            self.__scale_down_job(job.metadata.name)
             if self.print_pod_logs_on_exit:
                 self.__print_pod_logs()
             if self.delete_on_success:
                 self.__delete_job_cascade(job)
             return "SUCCEEDED"
 
-        if "failed" in job.obj["status"]:
-            failed_cnt = job.obj["status"]["failed"]
+        if job.status.failed is not None:
+            failed_cnt = job.status.failed
             self.__logger.debug("Kubernetes job " + self.uu_name
                                 + " status.failed: " + str(failed_cnt))
             if self.print_pod_logs_on_exit:
                 self.__print_pod_logs()
-            if failed_cnt > self.max_retrials:
-                job.scale(replicas=0)  # avoid more retrials
+            if failed_cnt >= self.backoff_limit:
+                self.__scale_down_job(job.metadata.name)
                 return "FAILED"
         return "RUNNING"
 
     def __delete_job_cascade(self, job):
-        delete_options_cascade = {
-            "kind": "DeleteOptions",
-            "apiVersion": "v1",
-            "propagationPolicy": "Background"
-        }
-        r = self.__kube_api.delete(json=delete_options_cascade, **job.api_kwargs())
-        if r.status_code != 200:
-            self.__kube_api.raise_for_status(r)
+        api_response = self.__kubernetes_batch_instance.delete_namespaced_job(job.metadata.name, self.kubernetes_namespace, body={"grace_period_seconds": 0, "propagation_policy": "Background"})
+        # TODO: Check status of this request...?
+        print(api_response.status)
 
     def run(self):
         self._init_kubernetes()
-        # Render job
-        job_json = {
-            "apiVersion": "batch/v1",
-            "kind": "Job",
-            "metadata": {
-                "name": self.uu_name,
-                "labels": {
-                    "spawned_by": "luigi",
-                    "luigi_task_id": self.job_uuid
-                }
-            },
-            "spec": {
-                "backoffLimit": self.backoff_limit,
-                "template": {
-                    "metadata": {
-                        "name": self.uu_name,
-                        "labels": {}
-                    },
-                    "spec": self.spec_schema
-                }
+
+        job_metadata = {
+            "name": self.uu_name,
+            "labels": {
+                "spawned_by": "luigi",
+                "luigi_task_id": self.job_uuid,
+                "luigi_name": self.uu_name,
+            }
+        }
+        
+        job_spec = {
+            "backoffLimit": self.backoff_limit,
+            "template": {
+                "metadata": {
+                    "name": self.uu_name,
+                    "labels": {}
+                },
+                "spec": self.spec_schema
             }
         }
+
         if self.kubernetes_namespace is not None:
-            job_json['metadata']['namespace'] = self.kubernetes_namespace
+            job_metadata['namespace'] = self.kubernetes_namespace
         if self.active_deadline_seconds is not None:
-            job_json['spec']['activeDeadlineSeconds'] = \
-                self.active_deadline_seconds
+            job_spec['activeDeadlineSeconds'] = self.active_deadline_seconds
+
         # Update user labels
-        job_json['metadata']['labels'].update(self.labels)
-        job_json['spec']['template']['metadata']['labels'].update(self.labels)
+        job_metadata['labels'].update(self.labels)
+        job_spec['template']['metadata']['labels'].update(self.labels)
 
         # Add default restartPolicy if not specified
         if "restartPolicy" not in self.spec_schema:
-            job_json["spec"]["template"]["spec"]["restartPolicy"] = "Never"
+            job_spec["template"]["spec"]["restartPolicy"] = "Never"
         # Submit job
         self.__logger.info("Submitting Kubernetes Job: " + self.uu_name)
-        job = Job(self.__kube_api, job_json)
-        job.create()
+        body = kubernetes_api.client.V1Job(metadata=job_metadata, spec=job_spec)
+        
+        try:
+            api_response = self.__kubernetes_batch_instance.create_namespaced_job(self.kubernetes_namespace, body)
+            self.__logger.info("Successfully Created Kubernetes Job uid: " + api_response.metadata.uid)
+        except kubernetes_api.client.rest.ApiException as e:
+           print("Exception when calling BatchV1Api->create_namespaced_job: %s\n" % e)
+
+
         # Track the Job (wait while active)
         self.__logger.info("Start tracking Kubernetes Job: " + self.uu_name)
         self.__track_job()
diff --git a/test/contrib/kubernetes_test.py b/test/contrib/kubernetes_test.py
index 246ca777d6..650df26c40 100644
--- a/test/contrib/kubernetes_test.py
+++ b/test/contrib/kubernetes_test.py
@@ -21,11 +21,13 @@
 
 Requires:
 
-- pykube: ``pip install pykube-ng``
-- A local minikube custer up and running: http://kubernetes.io/docs/getting-started-guides/minikube/
-
-**WARNING**: For Python versions < 3.5 the kubeconfig file must point to a Kubernetes API
-hostname, and NOT to an IP address.
+- Official kubernetes-client python library: ``pip install kubernetes``
+    - See: https://github.com/kubernetes-client/python/
+- A kubectl configuration and that is active and functional
+    - Can be your kubectl config setup for EKS with `aws eks update-kubeconfig --name clustername`
+    - Or access to any other hosted/managed/self-setup Kubernetes cluster
+    - For devs can a local minikube custer up and running: http://kubernetes.io/docs/getting-started-guides/minikube/
+    - Or for devs Docker Desktop has support for minikube: https://www.docker.com/products/docker-desktop
 
 Written and maintained by Marco Capuccini (@mcapuccini).
 """
@@ -41,9 +43,7 @@
 logger = logging.getLogger('luigi-interface')
 
 try:
-    from pykube.config import KubeConfig
-    from pykube.http import HTTPClient
-    from pykube.objects import Job
+    import kubernetes as kubernetes_api
 except ImportError:
     raise unittest.SkipTest('pykube is not installed. This test requires pykube.')
 
@@ -61,7 +61,6 @@ class SuccessJob(KubernetesJobTask):
 
 class FailJob(KubernetesJobTask):
     name = "fail"
-    max_retrials = 3
     backoff_limit = 3
     spec_schema = {
         "containers": [{
@@ -86,15 +85,16 @@ def test_success_job(self):
     def test_fail_job(self):
         fail = FailJob()
         self.assertRaises(RuntimeError, fail.run)
-        # Check for retrials
-        kube_api = HTTPClient(KubeConfig.from_file("~/.kube/config"))  # assumes minikube
-        jobs = Job.objects(kube_api).filter(selector="luigi_task_id="
-                                                     + fail.job_uuid)
-        self.assertEqual(len(jobs.response["items"]), 1)
-        job = Job(kube_api, jobs.response["items"][0])
-        self.assertTrue("failed" in job.obj["status"])
-        self.assertTrue(job.obj["status"]["failed"] > fail.max_retrials)
-        self.assertTrue(job.obj['spec']['template']['metadata']['labels'] == fail.labels())
+        # TODO: Discuss/consider what to re-implement here, possibly only keep the fail.labels check, possibly ad backoff limit and name check
+        # # Check for retrials
+        # kube_api = HTTPClient(KubeConfig.from_file("~/.kube/config"))  # assumes minikube
+        # jobs = Job.objects(kube_api).filter(selector="luigi_task_id="
+        #                                              + fail.job_uuid)
+        # self.assertEqual(len(jobs.response["items"]), 1)
+        # job = Job(kube_api, jobs.response["items"][0])
+        # self.assertTrue("failed" in job.obj["status"])
+        # self.assertTrue(job.obj["status"]["failed"] > fail.max_retrials)
+        # self.assertTrue(job.obj['spec']['template']['metadata']['labels'] == fail.labels())
 
     @mock.patch.object(KubernetesJobTask, "_KubernetesJobTask__get_job_status")
     @mock.patch.object(KubernetesJobTask, "signal_complete")
diff --git a/tox.ini b/tox.ini
index ff75cf6f12..f84539d329 100644
--- a/tox.ini
+++ b/tox.ini
@@ -28,6 +28,7 @@ deps =
     mock<2.0
     moto>=1.3.10
     HTTPretty==0.8.10
+    kubernetes==17.17.0
     docker>=2.1.0
     boto>=2.42,<3.0
     boto3>=1.11.0

From c0e31b7a5fb1f2c0ab61ce139b76ec34840a7276 Mon Sep 17 00:00:00 2001
From: Farley <farley@neonsurge.com>
Date: Tue, 3 Aug 2021 19:51:21 +1200
Subject: [PATCH 3/7] adding note about library

---
 test/contrib/kubernetes_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/contrib/kubernetes_test.py b/test/contrib/kubernetes_test.py
index 650df26c40..66f1241436 100644
--- a/test/contrib/kubernetes_test.py
+++ b/test/contrib/kubernetes_test.py
@@ -45,7 +45,7 @@
 try:
     import kubernetes as kubernetes_api
 except ImportError:
-    raise unittest.SkipTest('pykube is not installed. This test requires pykube.')
+    raise unittest.SkipTest('kubernetes is not installed. This test requires kubernetes.')
 
 
 class SuccessJob(KubernetesJobTask):

From 6f91aabd007f2dd1edeff5dd65afdca05cb5637c Mon Sep 17 00:00:00 2001
From: Farley <farley@neonsurge.com>
Date: Wed, 25 Aug 2021 03:25:08 +1200
Subject: [PATCH 4/7] not using farleys name

---
 examples/kubernetes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/kubernetes.py b/examples/kubernetes.py
index 83795f1a7f..050d9481ad 100644
--- a/examples/kubernetes.py
+++ b/examples/kubernetes.py
@@ -49,7 +49,7 @@ class PerlPi(KubernetesJobTask):
 
     name = "pi"                         # The name (prefix) of the job that will be created for identification purposes
     backoff_limit = 2                   # This is the number of retries incase there is a pod/node/code failure
-    kubernetes_namespace = "farley"     # This is the kubernetes namespace you wish to run in
+    kubernetes_namespace = "my_nshere"  # This is the kubernetes namespace you wish to run in
     print_pod_logs_on_exit = True       # Set this to true if you wish to see the logs of this
     spec_schema = {                     # This is the standard "spec" of the containers in this job, this is a good sane example
         "containers": [{

From 7155d176513dc41bebbf692ab9439b48d5685247 Mon Sep 17 00:00:00 2001
From: Farley <farley@neonsurge.com>
Date: Mon, 30 Aug 2021 17:51:25 +1200
Subject: [PATCH 5/7] cleaning up MR, still need to fix and add tests

---
 examples/kubernetes.py          |  25 +++-
 luigi/contrib/kubernetes.py     | 218 +++++++++++++++++++++++++-------
 test/contrib/kubernetes_test.py | 151 +++++++++++++++++-----
 3 files changed, 308 insertions(+), 86 deletions(-)

diff --git a/examples/kubernetes.py b/examples/kubernetes.py
index 050d9481ad..a745910a1c 100644
--- a/examples/kubernetes.py
+++ b/examples/kubernetes.py
@@ -44,18 +44,31 @@
 # import luigi
 from luigi.contrib.kubernetes import KubernetesJobTask
 
-
 class PerlPi(KubernetesJobTask):
 
     name = "pi"                         # The name (prefix) of the job that will be created for identification purposes
-    backoff_limit = 2                   # This is the number of retries incase there is a pod/node/code failure
-    kubernetes_namespace = "my_nshere"  # This is the kubernetes namespace you wish to run in
-    print_pod_logs_on_exit = True       # Set this to true if you wish to see the logs of this
-    spec_schema = {                     # This is the standard "spec" of the containers in this job, this is a good sane example
+    kubernetes_namespace = "default"    # This is the kubernetes namespace you wish to run, if not specified it uses "default"
+    labels = {"job_name": "pi"}         # This is to add labels in Kubernetes to help identify it, this is on top of the internal luigi labels
+    backoff_limit = 2                   # This is the number of retries incase there is a pod/node/code failure, default 6
+    poll_interval = 1                   # To poll more regularly reduce this number, default 5
+    print_pod_logs_on_exit = False      # Set this to True if you wish to see the logs of this run after completion, False by default
+    print_pod_logs_during_run = True    # Set this to True if you wish to see the logs of this run while it is running, False by default
+    delete_on_success = True            # Set this to False to keep the job after it finishes successfully, for debugging purposes, True by default
+    spec_schema = {                     # This is the standard "spec" of the containers in this job, this is a good sane example with resource requests/limits
         "containers": [{
             "name": "pi",
             "image": "perl",
-            "command": ["perl",  "-Mbignum=bpi", "-wle", "print bpi(2000)"]
+            "command": ["perl",  "-Mbignum=bpi", "-wle", "print bpi(2000)"],
+            "resources": {
+                "requests": {
+                    "cpu": "50m",
+                    "memory": "50Mi"
+                },
+                "limits": {
+                    "cpu": "100m",
+                    "memory": "100Mi"
+                }
+            }
         }]
     }
 
diff --git a/luigi/contrib/kubernetes.py b/luigi/contrib/kubernetes.py
index 6dc5c8c84b..365c4b0f44 100644
--- a/luigi/contrib/kubernetes.py
+++ b/luigi/contrib/kubernetes.py
@@ -35,6 +35,7 @@
 Pivoted to official kubernetes-client python module by Farley (@AndrewFarley)
 """
 import logging
+import re
 import time
 import uuid
 from datetime import datetime
@@ -45,16 +46,10 @@
 
 try:
     import kubernetes as kubernetes_api
-except ImportError as i:
+except ImportError:
     logger.warning("WARNING: kubernetes is not installed. KubernetesJobTask requires kubernetes")
     logger.warning("  Please run 'pip install kubernetes' and try again")
 
-
-class kubernetes(luigi.Config):
-    kubernetes_namespace = luigi.OptionalParameter(
-        default=None,
-        description="K8s namespace in which the job will run")
-
 class KubernetesJobTask(luigi.Task):
     __DEFAULT_POLL_INTERVAL = 5  # see __track_job
     __DEFAULT_POD_CREATION_INTERVAL = 5
@@ -62,8 +57,9 @@ class KubernetesJobTask(luigi.Task):
     def _init_kubernetes(self):
         self.__logger = logger
         self.__kubernetes_core_api = kubernetes_api.client.CoreV1Api()
+
         # Configs can be set in Configuration class directly or using helper utility, by default lets try to load in-cluster config
-        # TODO: Make library support forcing one of these instead of automatic cascading logic...?
+        # and if that fails cascade into using an kube config
         try:
            kubernetes_api.config.load_incluster_config()
         except Exception as e:
@@ -71,16 +67,16 @@ def _init_kubernetes(self):
               kubernetes_api.config.load_kube_config()
            except Exception as ex:
               raise ex
-        
+
         # Create our API instances for Kubernetes
         self.__kubernetes_api_instance = kubernetes_api.client.CoreV1Api()
         self.__kubernetes_batch_instance = kubernetes_api.client.BatchV1Api()
-        
+
         self.job_uuid = str(uuid.uuid4().hex)
         now = datetime.utcnow()
-        
+
         # Set a namespace if not specified because we run jobs in a specific namespace, always
-        if self.kubernetes_namespace is None:
+        if not self.kubernetes_namespace:
             self.kubernetes_namespace = "default"
 
         self.uu_name = "%s-%s-%s" % (self.name, now.strftime('%Y%m%d%H%M%S'), self.job_uuid[:16])
@@ -89,7 +85,21 @@ def _init_kubernetes(self):
     def kubernetes_namespace(self):
         """
         Namespace in Kubernetes where the job will run.
-        It defaults to the default namespace in Kubernetes
+        It defaults to the default namespace in Kubernetes, you can override this
+        by setting this property in your spec.
+
+        .. code-block:: python
+
+            class PerlPi(KubernetesJobTask):
+                name = "test"
+                kubernetes_namespace = "inserthere"   # <-- here
+                spec_schema = {
+                    "containers": [{
+                        "name": "pi",
+                        "image": "perl",
+                        "command": ["perl",  "-Mbignum=bpi", "-wle", "print bpi(2000)"]
+                    }]
+                }
 
         For more details, please refer to:
         https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
@@ -100,9 +110,9 @@ def kubernetes_namespace(self):
     def name(self):
         """
         A name for this job. This task will automatically append a UUID to the
-        name before to submit to Kubernetes.
+        name before to submit to Kubernetes.  This is not optional.
         """
-        raise NotImplementedError("subclass must define name")
+        raise NotImplementedError("You must define the name of this job, please override the `name` property")
 
     @property
     def labels(self):
@@ -116,7 +126,8 @@ def labels(self):
     @property
     def spec_schema(self):
         """
-        Kubernetes Job spec schema in JSON format, an example follows.
+        Kubernetes Job spec schema in JSON format, an example follows. This
+        is not optional.
 
         .. code-block:: javascript
 
@@ -138,7 +149,7 @@ def spec_schema(self):
         For more informations please refer to:
         http://kubernetes.io/docs/user-guide/pods/multi-container/#the-spec-schema
         """
-        raise NotImplementedError("subclass must define spec_schema")
+        raise NotImplementedError("You must define the `spec_schema` of an Kubernetes Job")
 
     @property
     def backoff_limit(self):
@@ -151,35 +162,82 @@ def backoff_limit(self):
     @property
     def delete_on_success(self):
         """
-        Delete the Kubernetes workload if the job has ended successfully.
+        Delete the Kubernetes workload if the job has ended successfully.  True by default
         """
         return True
 
     @property
     def print_pod_logs_on_exit(self):
         """
-        Fetch and print the pod logs once the job is completed.
+        Fetch and print the pod logs once the job is completed.  False by default
+        """
+        return False
+
+    @property
+    def print_pod_logs_during_run(self):
+        """
+        Fetch and print the pod logs during the the job.  False by default
+        TODO MUST IMPLEMENT THIS BEFORE MERGING...
         """
         return False
 
     @property
     def active_deadline_seconds(self):
         """
-        Time allowed to successfully schedule pods.
+        Time allowed to successfully schedule AND run the pods.
         See: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#job-termination-and-cleanup
         """
         return None
 
     @property
     def poll_interval(self):
-        """How often to poll Kubernetes for job status, in seconds."""
+        """How often to poll Kubernetes for job status, in seconds.  Default of 5"""
         return self.__DEFAULT_POLL_INTERVAL
 
     @property
     def pod_creation_wait_interal(self):
-        """Delay for initial pod creation for just submitted job in seconds"""
+        """Delay for initial pod creation for just submitted job in seconds.  Default of 5"""
         return self.__DEFAULT_POD_CREATION_INTERVAL
 
+
+    def __is_scaling_in_progress(self, condition, messages):
+        """Parses condition and messages, returns true if cluster is currently scaling up"""
+
+        # If we're not unschedulable then stop processing...
+        if condition.reason != 'Unschedulable':
+            return False
+
+        # Lets check the messages
+        for message in messages:
+            # Check if our status message is about node availability
+            match = re.match(r'(\d)\/(\d) nodes are available.*', message)
+            if match:
+                current_nodes = int(match.group(1))
+                target_nodes = int(match.group(2))
+                # Only if there's non-matching nodes and it is still not being scheduled
+                if current_nodes <= target_nodes:
+                    return True
+
+
+    def __has_scaling_failed(self, condition, messages):
+        """Parses messages from kubectl events to see if scaling up failed"""
+        try:
+            # If we're not unschedulable then stop processing...
+            if condition.reason != 'Unschedulable':
+                return False
+
+            # Check our messages for the can't scale up message
+            for message in messages:
+                # Check if our status message is about that we can't scale up, if so exit immediately
+                if "pod didn't trigger scale-up (it wouldn" in message:
+                    # We return here immediately on purpose, instead of the delayed return below
+                    return True
+        except:
+            pass
+
+        return False
+
+
     def __track_job(self):
         """Poll job status while active"""
         while not self.__verify_job_has_started():
@@ -210,12 +268,19 @@ def signal_complete(self):
         """
         pass
 
+    def __get_pods_events(self, pod_name):
+        api_response = self.__kubernetes_api_instance.list_namespaced_event(namespace=self.kubernetes_namespace, limit=10, field_selector="involvedObject.name=" + pod_name)
+        output_messages = []
+        for item in api_response.items:
+            output_messages.append(item.message)
+        return output_messages
+
     def __get_pods(self):
-        api_response = self.__kubernetes_api_instance.list_namespaced_pod(self.kubernetes_namespace, limit=10, label_selector="job-name=" + self.uu_name)
+        api_response = self.__kubernetes_api_instance.list_namespaced_pod(namespace=self.kubernetes_namespace, limit=10, label_selector="job-name=" + self.uu_name)
         return api_response.items
 
     def __get_job(self):
-        api_response = self.__kubernetes_batch_instance.list_namespaced_job(self.kubernetes_namespace, limit=10, label_selector="luigi_task_id=" + self.job_uuid)
+        api_response = self.__kubernetes_batch_instance.list_namespaced_job(namespace=self.kubernetes_namespace, limit=10, label_selector="luigi_task_id=" + self.job_uuid)
         assert len(api_response.items) == 1, "Kubernetes job " + self.uu_name + " not found"
         return api_response.items[0]
 
@@ -241,7 +306,7 @@ def __verify_job_has_started(self):
         # Verify that the job started
         self.__get_job()
 
-        # Verify that the pod started
+        # Verify that the pod was created
         pods = self.__get_pods()
         if not pods:
             self.__logger.debug(
@@ -252,25 +317,72 @@ def __verify_job_has_started(self):
 
         assert len(pods) > 0, "No pod scheduled by " + self.uu_name
         for pod in pods:
-            for cont_stats in pod.status.container_statuses:
-                if cont_stats.state.terminated is not None:
-                    t = cont_stats.state.terminated
-                    err_msg = "Pod %s %s (exit code %d). Logs: `kubectl logs pod/%s`" % (
-                        pod.name, t['reason'], t['exitCode'], pod.name)
-                    assert t['exitCode'] == 0, err_msg
-
-                if cont_stats.state.waiting is not None:
-                    wr = cont_stats.state.waiting.reason
-                    assert wr == 'ContainerCreating', "Pod %s %s. Logs: `kubectl logs pod/%s`" % (
-                        pod.name, wr, pod.name)
 
+            # Get our Kubectl events stream to get full event info about this pod (like failed scaling, or status as things progress)
+            pod_event_messages = self.__get_pods_events(pod.metadata.name)
+
+            # If we've got debugging enabled then print our pod messages for debugging purposes
+            for message in pod_event_messages:
+                self.__logger.debug("POD EVENT: " + message.strip())
+
+            # Verify the pod status conditions (success/failure)
+            if pod.status.container_statuses:
+                for container_statuses in pod.status.container_statuses:
+                    if container_statuses.state.terminated is not None:
+                        err_msg = "Pod %s %s (exit code %d). Logs: `kubectl logs pod/%s -n %s`" % (
+                            pod.metadata.name, container_statuses.state.terminated.reason,
+                            container_statuses.state.terminated.exit_code, pod.metadata.name, pod.metadata.namespace)
+                        assert container_statuses.state.terminated.exit_code == 0, err_msg
+
+                    if container_statuses.state.waiting is not None:
+                        wr = container_statuses.state.waiting.reason
+                        assert wr == 'ContainerCreating', "Pod %s %s. Logs: `kubectl logs pod/%s`" % (
+                            pod.metadata.name, wr, pod.metadata.name)
+
+            # Iterate through conditions, with a delayed return handler
+            willReturnFalse = False
             for cond in pod.status.conditions:
-                if cond.message is not None:
-                    if cond.message == 'ContainersNotReady':
-                        return False
-                    if cond.status != 'False':
+                if cond.reason is not None:
+                    if cond.reason == 'ContainersNotReady':
+                        self.__logger.debug("ContainersNotReady: " + cond.message)
+                        willReturnFalse = True
+                    elif cond.reason == 'ContainerCannotRun':
+                        self.__logger.debug("ContainerCannotRun: " + cond.message)
+                        willReturnFalse = True
+                    elif cond.reason == 'Unschedulable':
+                        # Check if we're scaling up...
+                        if self.__is_scaling_in_progress(cond, pod_event_messages):
+                            # Check if we fatally failed scaling up...
+                            if self.__has_scaling_failed(cond, pod_event_messages):
+                                # We failed scaling up
+                                self.__logger.info("We failed scaling up for this job")
+                                self.__logger.info("`kubectl describe pod %s -n %s`" % (pod.metadata.name, pod.metadata.namespace))
+                                raise Exception("This job is unschedulable, please view the kubectl events or describe the pod for more info")
+
+                            # Wait if cluster is scaling up
+                            self.__logger.debug("Kubernetes is possibly scaling up or unable to: " + cond.message)
+                            self.__logger.debug("To inspect in another console: `kubectl describe pod %s`" % (pod.metadata.name))
+                            willReturnFalse = True
+                        else:
+                            self.__logger.info("Kubernetes is unable to schedule this job: " + cond.message)
+                            return False
+
+                    elif cond.status != 'False':
                         self.__logger.warning("[ERROR] %s - %s" % (cond.reason, cond.message))
-                        return False
+                        willReturnFalse = True
+
+            if pod.status.phase == "Running":
+                self.__logger.debug("Pod %s is running..." % (pod.metadata.name))
+                return True
+            # Catch after parsing all conditions above and if the pod isn't running, since a pod can be healthy and running _after_ failures
+            elif not willReturnFalse:
+                return False
+
+            # Verify that the pod started (passed pending, don't parse further if still pending)
+            if pod.status.phase == "Pending":
+                self.__logger.debug("Pod %s still pending being scheduled..." % (pod.metadata.name))
+                return False
+
         return True
 
     def __scale_down_job(self, job_name):
@@ -305,9 +417,16 @@ def __get_job_status(self):
         return "RUNNING"
 
     def __delete_job_cascade(self, job):
+        self.__logger.debug("Deleting Kubernetes job " + job.metadata.name + " upon request")
         api_response = self.__kubernetes_batch_instance.delete_namespaced_job(job.metadata.name, self.kubernetes_namespace, body={"grace_period_seconds": 0, "propagation_policy": "Background"})
-        # TODO: Check status of this request...?
-        print(api_response.status)
+        # Verify if we deleted properly
+        if "succeeded': 1" in api_response.status:
+            self.__logger.debug("Deleting Kubernetes job " + job.metadata.name + " succeeded")
+            return True
+        else:
+            self.__logger.info("Deleting Kubernetes job " + job.metadata.name + " failed: ")
+            self.__logger.debug(api_response.status)
+            return False
 
     def run(self):
         self._init_kubernetes()
@@ -320,13 +439,17 @@ def run(self):
                 "luigi_name": self.uu_name,
             }
         }
-        
+
         job_spec = {
             "backoffLimit": self.backoff_limit,
             "template": {
                 "metadata": {
                     "name": self.uu_name,
-                    "labels": {}
+                    "labels": {
+                        "spawned_by": "luigi",
+                        "luigi_task_id": self.job_uuid,
+                        "luigi_name": self.uu_name,
+                    }
                 },
                 "spec": self.spec_schema
             }
@@ -345,16 +468,15 @@ def run(self):
         if "restartPolicy" not in self.spec_schema:
             job_spec["template"]["spec"]["restartPolicy"] = "Never"
         # Submit job
-        self.__logger.info("Submitting Kubernetes Job: " + self.uu_name)
+        self.__logger.info("Submitting Kubernetes Job: %s in Namespace: %s" % (self.uu_name, job_metadata['namespace']))
         body = kubernetes_api.client.V1Job(metadata=job_metadata, spec=job_spec)
-        
+
         try:
             api_response = self.__kubernetes_batch_instance.create_namespaced_job(self.kubernetes_namespace, body)
             self.__logger.info("Successfully Created Kubernetes Job uid: " + api_response.metadata.uid)
         except kubernetes_api.client.rest.ApiException as e:
            print("Exception when calling BatchV1Api->create_namespaced_job: %s\n" % e)
 
-
         # Track the Job (wait while active)
         self.__logger.info("Start tracking Kubernetes Job: " + self.uu_name)
         self.__track_job()
diff --git a/test/contrib/kubernetes_test.py b/test/contrib/kubernetes_test.py
index 66f1241436..1eb8abbb5b 100644
--- a/test/contrib/kubernetes_test.py
+++ b/test/contrib/kubernetes_test.py
@@ -58,21 +58,31 @@ class SuccessJob(KubernetesJobTask):
         }]
     }
 
-
 class FailJob(KubernetesJobTask):
     name = "fail"
-    backoff_limit = 3
     spec_schema = {
         "containers": [{
             "name": "fail",
-            "image": "alpine:3.4",
-            "command": ["You",  "Shall", "Not", "Pass"]
+             "image": "alpine:3.4",
+             "command": ["You",  "Shall", "Not", "Pass"]
         }]
     }
 
-    @property
-    def labels(self):
-        return {"dummy_label": "dummy_value"}
+# TODO TESTING
+# class FailContainerCanNotRunInvalidCommandJob(KubernetesJobTask):
+#     name = "fail"
+#     backoff_limit = 1 # We will set 1 retry on purpose and check below if it is retrying properly
+#     spec_schema = {
+#         "containers": [{
+#             "name": "invalidcommand",
+#             "image": "alpine:3.4",
+#             "command": ["You",  "Shall", "Not", "Pass"]
+#         }]
+#     }
+#
+#     @property
+#     def labels(self):
+#         return {"dummy_label": "dummy_value"}
 
 
 @pytest.mark.contrib
@@ -85,28 +95,105 @@ def test_success_job(self):
     def test_fail_job(self):
         fail = FailJob()
         self.assertRaises(RuntimeError, fail.run)
-        # TODO: Discuss/consider what to re-implement here, possibly only keep the fail.labels check, possibly ad backoff limit and name check
-        # # Check for retrials
-        # kube_api = HTTPClient(KubeConfig.from_file("~/.kube/config"))  # assumes minikube
-        # jobs = Job.objects(kube_api).filter(selector="luigi_task_id="
-        #                                              + fail.job_uuid)
-        # self.assertEqual(len(jobs.response["items"]), 1)
-        # job = Job(kube_api, jobs.response["items"][0])
-        # self.assertTrue("failed" in job.obj["status"])
-        # self.assertTrue(job.obj["status"]["failed"] > fail.max_retrials)
-        # self.assertTrue(job.obj['spec']['template']['metadata']['labels'] == fail.labels())
-
-    @mock.patch.object(KubernetesJobTask, "_KubernetesJobTask__get_job_status")
-    @mock.patch.object(KubernetesJobTask, "signal_complete")
-    def test_output(self, mock_signal, mock_job_status):
-        # mock that the job succeeded
-        mock_job_status.return_value = "succeeded"
-        # create a kubernetes job
-        kubernetes_job = KubernetesJobTask()
-        # set logger and uu_name due to logging in __track_job()
-        kubernetes_job._KubernetesJobTask__logger = logger
-        kubernetes_job.uu_name = "test"
-        # track the job (bc included in run method)
-        kubernetes_job._KubernetesJobTask__track_job()
-        # Make sure successful job signals
-        self.assertTrue(mock_signal.called)
+
+    # TODO WORK IN PROGRESS...
+    # def test_fail_container_can_not_run_invalid_command_job(self):
+    #     failure = luigi.run(["FailContainerCanNotRunInvalidCommandJob", "--local-scheduler"])
+    #     self.assertFalse(failure)
+    #     print("failure")
+    #     print(failure)
+    #     print("failure")
+
+    # def test_fail_container_can_not_run_invalid_command_job(self):
+    #     print('starting...')
+    #     kubernetes_job = FailContainerCanNotRunInvalidCommandJob()
+    #     print('done...')
+    #     try:
+    #         kubernetes_job.run()
+    #         print('we are here')
+    #         assert True is False
+    #     except Exception as e:
+    #         print("exception")
+    #         print(e)
+    #
+    #     pods = kubernetes_job.__get_pods()
+    #     print('pods')
+    #     print(pods)
+    #
+
+
+    # def test_fail_job(self):
+    #     fail = FailJob()
+    #     self.assertRaises(RuntimeError, fail.run)
+
+    # @mock.patch.object(KubernetesJobTask, "_KubernetesJobTask__get_job_status")
+    # @mock.patch.object(KubernetesJobTask, "signal_complete")
+    # def test_output(self, mock_signal, mock_job_status):
+    #     # mock that the job succeeded
+    #     mock_job_status.return_value = "succeeded"
+    #     # create a kubernetes job
+    #     kubernetes_job = KubernetesJobTask()
+    #     # set logger and uu_name due to logging in __track_job()
+    #     kubernetes_job._KubernetesJobTask__logger = logger
+    #     kubernetes_job.uu_name = "test"
+    #     # track the job (bc included in run method)
+    #     kubernetes_job._KubernetesJobTask__track_job()
+    #     # Make sure successful job signals
+    #     self.assertTrue(mock_signal.called)
+
+
+    # TODO:
+    #
+    # def test_cluster_is_scaling(self):
+    #     kubernetes_job = KubernetesJobTask()
+    #     condition = {
+    #         "reason": "Unschedulable",
+    #         "message": "0/1 nodes are available: 1 Insufficient cpu, 1 Insufficient memory."
+    #     }
+    #     assert kubernetes_job.__is_scaling_in_progress(condition)
+    #
+    #     condition = {
+    #         "reason": "ContainersNotReady",
+    #         "message": "0/1 nodes are available: 1 Insufficient cpu, 1 Insufficient memory."
+    #     }
+    #     assert kubernetes_job.__is_scaling_in_progress(condition) is False
+    #
+    #     condition = {
+    #         "reason": "Unschedulable",
+    #         "message": "1/1 nodes are available: 1 Insufficient cpu, 1 Insufficient memory."
+    #     }
+    #     assert kubernetes_job.__is_scaling_in_progress(condition) is True
+    #
+    #     condition = {
+    #         "reason": "Unschedulable",
+    #         "message": "other message"
+    #     }
+    #     assert kubernetes_job.__is_scaling_in_progress(condition) is False
+    #
+    #     condition = {
+    #         "message": "other message"
+    #     }
+    #     assert kubernetes_job.__is_scaling_in_progress(condition) is False
+    #
+    # @mock.patch.object(KubernetesJobTask, "_KubernetesJobTask__get_job_status")
+    # @mock.patch.object(KubernetesJobTask, "KubernetesJobTask__get_pods")
+    # def test_output_when_scaling(self, mock_get_pods, mock_job_status):
+    #     # mock that the job succeeded
+    #     cond1 = {
+    #         "reason": "Unschedulable",
+    #         "message": "1/1 nodes are available: 1 Insufficient cpu, 1 Insufficient memory."
+    #     }
+    #     mock_job_status.return_value = "succeeded"
+    #     mock_get_pods.return_value = [
+    #         {
+    #             'conditions': [
+    #                 cond1
+    #             ]
+    #          }
+    #     ]
+    #     # create a kubernetes job
+    #     kubernetes_job = KubernetesJobTask()
+    #     # set logger and uu_name due to logging in __track_job()
+    #     kubernetes_job._KubernetesJobTask__logger = logger
+    #     kubernetes_job.uu_name = "test"
+    #     self.assertTrue(kubernetes_job._KubernetesJobTask____verify_job_has_started())

From a1ef34564fda4ec89c76394bf986b5b985398949 Mon Sep 17 00:00:00 2001
From: Farley <farley@neonsurge.com>
Date: Tue, 31 Aug 2021 02:07:01 +1200
Subject: [PATCH 6/7] Adding better readme to example

---
 examples/kubernetes.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/examples/kubernetes.py b/examples/kubernetes.py
index a745910a1c..6b57e000b2 100644
--- a/examples/kubernetes.py
+++ b/examples/kubernetes.py
@@ -21,8 +21,9 @@
 
 - Official kubernetes-client python library: ``pip install kubernetes``
     - See: https://github.com/kubernetes-client/python/
-- A kubectl configuration and that is active and functional
-    - Can be your kubectl config setup for EKS with `aws eks update-kubeconfig --name clustername`
+- Run Within' an Kubernetes cluster with an ClusterRole granting it access to Kubernetes APIs
+- OR A working kubectl configuration and that is active and functional
+    - This can be your kubectl config setup for EKS with `aws eks update-kubeconfig --name clustername`
     - Or access to any other hosted/managed/self-setup Kubernetes cluster
     - For devs can a local minikube custer up and running: http://kubernetes.io/docs/getting-started-guides/minikube/
     - Or for devs Docker Desktop has support for minikube: https://www.docker.com/products/docker-desktop
@@ -35,9 +36,19 @@
         $ python -m luigi --module examples.kubernetes PerlPi --local-scheduler
 
 Running this code will create a pi-luigi-uuid kubernetes job within the cluster
-pointed to by the default context in "~/.kube/config".  It will also auto-detect if this
+of whatever your current context is for kubectl.  The login herein will auto-detect if this
 is running in an Kubernetes Cluster and has functional access to the local cluster's APIs
-via an ClusterRole.
+via an ClusterRole, and if not it will then try to use the kubeconfig and its various environment
+variables to support configuring and tweaking kubectl.
+
+DEPRECATION NOTE: The previous version of the kubernetes library had you configure your kubectl configuration file
+as an property in the config file.  This was removed in favor of using the kubectl standards.  To specify a config use
+the env variable KUBECONFIG.  For example:
+
+    .. code:: console
+        $ KUBECONFIG=~/.kube/my-custom-config PYTHONPATH=. luigi --module examples.kubernetes_job PerlPi --local-scheduler
+
+For more see: https://kubernetes.io/docs/tasks/access-application-cluster/configure-access-multiple-clusters/
 """
 
 # import os
@@ -49,7 +60,8 @@ class PerlPi(KubernetesJobTask):
     name = "pi"                         # The name (prefix) of the job that will be created for identification purposes
     kubernetes_namespace = "default"    # This is the kubernetes namespace you wish to run, if not specified it uses "default"
     labels = {"job_name": "pi"}         # This is to add labels in Kubernetes to help identify it, this is on top of the internal luigi labels
-    backoff_limit = 2                   # This is the number of retries incase there is a pod/node/code failure, default 6
+    backoff_limit = 0                   # This is the number of retries incase there is a pod/node/code failure, default 6
+    active_deadline_seconds = None      # This is a "timeout" in seconds, how long to wait for the pods to schedule and execute before failing, default None
     poll_interval = 1                   # To poll more regularly reduce this number, default 5
     print_pod_logs_on_exit = False      # Set this to True if you wish to see the logs of this run after completion, False by default
     print_pod_logs_during_run = True    # Set this to True if you wish to see the logs of this run while it is running, False by default

From af5959e069efccf026b8558e2d954b37e9aa0a4a Mon Sep 17 00:00:00 2001
From: Farley <farley@neonsurge.com>
Date: Sun, 29 Jan 2023 12:42:26 +1300
Subject: [PATCH 7/7] fixing code styling

---
 examples/kubernetes.py          |  1 +
 luigi/contrib/kubernetes.py     | 36 +++++++++++++++++----------------
 test/contrib/kubernetes_test.py | 20 +++++++++++++-----
 3 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/examples/kubernetes.py b/examples/kubernetes.py
index 6b57e000b2..062d80b002 100644
--- a/examples/kubernetes.py
+++ b/examples/kubernetes.py
@@ -55,6 +55,7 @@
 # import luigi
 from luigi.contrib.kubernetes import KubernetesJobTask
 
+
 class PerlPi(KubernetesJobTask):
 
     name = "pi"                         # The name (prefix) of the job that will be created for identification purposes
diff --git a/luigi/contrib/kubernetes.py b/luigi/contrib/kubernetes.py
index 365c4b0f44..fd6ca1b0a5 100644
--- a/luigi/contrib/kubernetes.py
+++ b/luigi/contrib/kubernetes.py
@@ -50,6 +50,7 @@
     logger.warning("WARNING: kubernetes is not installed. KubernetesJobTask requires kubernetes")
     logger.warning("  Please run 'pip install kubernetes' and try again")
 
+
 class KubernetesJobTask(luigi.Task):
     __DEFAULT_POLL_INTERVAL = 5  # see __track_job
     __DEFAULT_POD_CREATION_INTERVAL = 5
@@ -61,12 +62,12 @@ def _init_kubernetes(self):
         # Configs can be set in Configuration class directly or using helper utility, by default lets try to load in-cluster config
         # and if that fails cascade into using an kube config
         try:
-           kubernetes_api.config.load_incluster_config()
-        except Exception as e:
-           try:
-              kubernetes_api.config.load_kube_config()
-           except Exception as ex:
-              raise ex
+            kubernetes_api.config.load_incluster_config()
+        except Exception:
+            try:
+                kubernetes_api.config.load_kube_config()
+            except Exception as ex:
+                raise ex
 
         # Create our API instances for Kubernetes
         self.__kubernetes_api_instance = kubernetes_api.client.CoreV1Api()
@@ -199,7 +200,6 @@ def pod_creation_wait_interal(self):
         """Delay for initial pod creation for just submitted job in seconds.  Default of 5"""
         return self.__DEFAULT_POD_CREATION_INTERVAL
 
-
     def __is_scaling_in_progress(self, condition, messages):
         """Parses condition and messages, returns true if cluster is currently scaling up"""
 
@@ -218,26 +218,22 @@ def __is_scaling_in_progress(self, condition, messages):
                 if current_nodes <= target_nodes:
                     return True
 
-
     def __has_scaling_failed(self, condition, messages):
         """Parses messages from kubectl events to see if scaling up failed"""
         try:
             # If we're not unschedulable then stop processing...
             if condition.reason != 'Unschedulable':
                 return False
-
             # Check our messages for the can't scale up message
             for message in messages:
                 # Check if our status message is about that we can't scale up, if so exit immediately
                 if "pod didn't trigger scale-up (it wouldn" in message:
                     # We return here immediately on purpose, instead of the delayed return below
                     return True
-        except:
+        except Exception:
             pass
-
         return False
 
-
     def __track_job(self):
         """Poll job status while active"""
         while not self.__verify_job_has_started():
@@ -269,18 +265,21 @@ def signal_complete(self):
         pass
 
     def __get_pods_events(self, pod_name):
-        api_response = self.__kubernetes_api_instance.list_namespaced_event(namespace=self.kubernetes_namespace, limit=10, field_selector="involvedObject.name=" + pod_name)
+        api_response = self.__kubernetes_api_instance.list_namespaced_event(
+            namespace=self.kubernetes_namespace, limit=10, field_selector="involvedObject.name=" + pod_name)
         output_messages = []
         for item in api_response.items:
             output_messages.append(item.message)
         return output_messages
 
     def __get_pods(self):
-        api_response = self.__kubernetes_api_instance.list_namespaced_pod(namespace=self.kubernetes_namespace, limit=10, label_selector="job-name=" + self.uu_name)
+        api_response = self.__kubernetes_api_instance.list_namespaced_pod(
+            namespace=self.kubernetes_namespace, limit=10, label_selector="job-name=" + self.uu_name)
         return api_response.items
 
     def __get_job(self):
-        api_response = self.__kubernetes_batch_instance.list_namespaced_job(namespace=self.kubernetes_namespace, limit=10, label_selector="luigi_task_id=" + self.job_uuid)
+        api_response = self.__kubernetes_batch_instance.list_namespaced_job(
+            namespace=self.kubernetes_namespace, limit=10, label_selector="luigi_task_id=" + self.job_uuid)
         assert len(api_response.items) == 1, "Kubernetes job " + self.uu_name + " not found"
         return api_response.items[0]
 
@@ -418,7 +417,10 @@ def __get_job_status(self):
 
     def __delete_job_cascade(self, job):
         self.__logger.debug("Deleting Kubernetes job " + job.metadata.name + " upon request")
-        api_response = self.__kubernetes_batch_instance.delete_namespaced_job(job.metadata.name, self.kubernetes_namespace, body={"grace_period_seconds": 0, "propagation_policy": "Background"})
+        api_response = self.__kubernetes_batch_instance.delete_namespaced_job(
+            job.metadata.name,
+            self.kubernetes_namespace,
+            body={"grace_period_seconds": 0, "propagation_policy": "Background"})
         # Verify if we deleted properly
         if "succeeded': 1" in api_response.status:
             self.__logger.debug("Deleting Kubernetes job " + job.metadata.name + " succeeded")
@@ -475,7 +477,7 @@ def run(self):
             api_response = self.__kubernetes_batch_instance.create_namespaced_job(self.kubernetes_namespace, body)
             self.__logger.info("Successfully Created Kubernetes Job uid: " + api_response.metadata.uid)
         except kubernetes_api.client.rest.ApiException as e:
-           print("Exception when calling BatchV1Api->create_namespaced_job: %s\n" % e)
+            print("Exception when calling BatchV1Api->create_namespaced_job: %s\n" % e)
 
         # Track the Job (wait while active)
         self.__logger.info("Start tracking Kubernetes Job: " + self.uu_name)
diff --git a/test/contrib/kubernetes_test.py b/test/contrib/kubernetes_test.py
index 1eb8abbb5b..13bb5b91da 100644
--- a/test/contrib/kubernetes_test.py
+++ b/test/contrib/kubernetes_test.py
@@ -35,7 +35,7 @@
 import unittest
 import luigi
 import logging
-import mock
+# import mock
 from luigi.contrib.kubernetes import KubernetesJobTask
 
 import pytest
@@ -47,6 +47,17 @@
 except ImportError:
     raise unittest.SkipTest('kubernetes is not installed. This test requires kubernetes.')
 
+# Configs can be set in Configuration class directly or using helper utility, by default lets try to load in-cluster config
+# and if that fails cascade into using an kube config
+kubernetes_core_api = kubernetes_api.client.CoreV1Api()
+try:
+    kubernetes_api.config.load_incluster_config()
+except Exception:
+    try:
+        kubernetes_api.config.load_kube_config()
+    except Exception as ex:
+        raise ex
+
 
 class SuccessJob(KubernetesJobTask):
     name = "success"
@@ -58,13 +69,14 @@ class SuccessJob(KubernetesJobTask):
         }]
     }
 
+
 class FailJob(KubernetesJobTask):
     name = "fail"
     spec_schema = {
         "containers": [{
             "name": "fail",
-             "image": "alpine:3.4",
-             "command": ["You",  "Shall", "Not", "Pass"]
+            "image": "alpine:3.4",
+            "command": ["You",  "Shall", "Not", "Pass"]
         }]
     }
 
@@ -121,7 +133,6 @@ def test_fail_job(self):
     #     print(pods)
     #
 
-
     # def test_fail_job(self):
     #     fail = FailJob()
     #     self.assertRaises(RuntimeError, fail.run)
@@ -141,7 +152,6 @@ def test_fail_job(self):
     #     # Make sure successful job signals
     #     self.assertTrue(mock_signal.called)
 
-
     # TODO:
     #
     # def test_cluster_is_scaling(self):