diff --git a/docs/cluster/auth.html b/docs/cluster/auth.html index dbec66ace..1beb27c77 100644 --- a/docs/cluster/auth.html +++ b/docs/cluster/auth.html @@ -54,6 +54,8 @@

Module codeflare_sdk.cluster.auth

import abc from kubernetes import client, config +import os +from ..utils.kube_api_helpers import _kube_api_error_handling global api_client api_client = None @@ -194,8 +196,23 @@

Module codeflare_sdk.cluster.auth

""" global config_path global api_client + home_directory = os.path.expanduser("~") if config_path == None and api_client == None: - config.load_kube_config() + if os.path.isfile("%s/.kube/config" % home_directory): + try: + config.load_kube_config() + except Exception as e: # pragma: no cover + _kube_api_error_handling(e) + elif "KUBERNETES_PORT" in os.environ: + try: + config.load_incluster_config() + except Exception as e: # pragma: no cover + _kube_api_error_handling(e) + else: + raise PermissionError( + "Action not permitted, have you put in correct/up-to-date auth credentials?" + ) + if config_path != None and api_client == None: return config_path @@ -253,8 +270,23 @@

Functions

""" global config_path global api_client + home_directory = os.path.expanduser("~") if config_path == None and api_client == None: - config.load_kube_config() + if os.path.isfile("%s/.kube/config" % home_directory): + try: + config.load_kube_config() + except Exception as e: # pragma: no cover + _kube_api_error_handling(e) + elif "KUBERNETES_PORT" in os.environ: + try: + config.load_incluster_config() + except Exception as e: # pragma: no cover + _kube_api_error_handling(e) + else: + raise PermissionError( + "Action not permitted, have you put in correct/up-to-date auth credentials?" + ) + if config_path != None and api_client == None: return config_path diff --git a/docs/cluster/cluster.html b/docs/cluster/cluster.html index e8205b425..11306f306 100644 --- a/docs/cluster/cluster.html +++ b/docs/cluster/cluster.html @@ -93,6 +93,39 @@

Module codeflare_sdk.cluster.cluster

self.app_wrapper_yaml = self.create_app_wrapper() self.app_wrapper_name = self.app_wrapper_yaml.split(".")[0] + def evaluate_config(self): + if not self.evaluate_dispatch_priority(): + return False + else: + return True + + def evaluate_dispatch_priority(self): + priority_class = self.config.dispatch_priority + if priority_class is None: + return True + else: + try: + config_check() + api_instance = client.CustomObjectsApi(api_config_handler()) + priority_classes = api_instance.list_cluster_custom_object( + group="scheduling.k8s.io", + version="v1", + plural="priorityclasses", + ) + available_priority_classes = [ + i["metadata"]["name"] for i in priority_classes["items"] + ] + except Exception as e: # pragma: no cover + return _kube_api_error_handling(e) + + if priority_class in available_priority_classes: + return True + else: + print( + f"Priority class {priority_class} is not available in the cluster" + ) + return False + def create_app_wrapper(self): """ Called upon cluster object creation, creates an AppWrapper yaml based on @@ -123,6 +156,7 @@

Module codeflare_sdk.cluster.cluster

env = self.config.envs local_interactive = self.config.local_interactive image_pull_secrets = self.config.image_pull_secrets + dispatch_priority = self.config.dispatch_priority return generate_appwrapper( name=name, namespace=namespace, @@ -139,6 +173,7 @@

Module codeflare_sdk.cluster.cluster

env=env, local_interactive=local_interactive, image_pull_secrets=image_pull_secrets, + dispatch_priority=dispatch_priority, ) # creates a new cluster with the provided or default spec @@ -147,6 +182,12 @@

Module codeflare_sdk.cluster.cluster

Applies the AppWrapper yaml, pushing the resource request onto the MCAD queue. """ + + # Before attempting to bring up the cluster let's evaluate the ClusterConfig + if not self.evaluate_config(): + print("Invalid Cluster Configuration") + return False + namespace = self.config.namespace try: config_check() @@ -787,6 +828,39 @@

Classes

self.app_wrapper_yaml = self.create_app_wrapper() self.app_wrapper_name = self.app_wrapper_yaml.split(".")[0] + def evaluate_config(self): + if not self.evaluate_dispatch_priority(): + return False + else: + return True + + def evaluate_dispatch_priority(self): + priority_class = self.config.dispatch_priority + if priority_class is None: + return True + else: + try: + config_check() + api_instance = client.CustomObjectsApi(api_config_handler()) + priority_classes = api_instance.list_cluster_custom_object( + group="scheduling.k8s.io", + version="v1", + plural="priorityclasses", + ) + available_priority_classes = [ + i["metadata"]["name"] for i in priority_classes["items"] + ] + except Exception as e: # pragma: no cover + return _kube_api_error_handling(e) + + if priority_class in available_priority_classes: + return True + else: + print( + f"Priority class {priority_class} is not available in the cluster" + ) + return False + def create_app_wrapper(self): """ Called upon cluster object creation, creates an AppWrapper yaml based on @@ -817,6 +891,7 @@

Classes

env = self.config.envs local_interactive = self.config.local_interactive image_pull_secrets = self.config.image_pull_secrets + dispatch_priority = self.config.dispatch_priority return generate_appwrapper( name=name, namespace=namespace, @@ -833,6 +908,7 @@

Classes

env=env, local_interactive=local_interactive, image_pull_secrets=image_pull_secrets, + dispatch_priority=dispatch_priority, ) # creates a new cluster with the provided or default spec @@ -841,6 +917,12 @@

Classes

Applies the AppWrapper yaml, pushing the resource request onto the MCAD queue. """ + + # Before attempting to bring up the cluster let's evaluate the ClusterConfig + if not self.evaluate_config(): + print("Invalid Cluster Configuration") + return False + namespace = self.config.namespace try: config_check() @@ -1181,6 +1263,7 @@

Methods

env = self.config.envs local_interactive = self.config.local_interactive image_pull_secrets = self.config.image_pull_secrets + dispatch_priority = self.config.dispatch_priority return generate_appwrapper( name=name, namespace=namespace, @@ -1197,6 +1280,7 @@

Methods

env=env, local_interactive=local_interactive, image_pull_secrets=image_pull_secrets, + dispatch_priority=dispatch_priority, ) @@ -1246,6 +1330,59 @@

Methods

return _kube_api_error_handling(e) +
+def evaluate_config(self) +
+
+
+
+ +Expand source code + +
def evaluate_config(self):
+    if not self.evaluate_dispatch_priority():
+        return False
+    else:
+        return True
+
+
+
+def evaluate_dispatch_priority(self) +
+
+
+
+ +Expand source code + +
def evaluate_dispatch_priority(self):
+    priority_class = self.config.dispatch_priority
+    if priority_class is None:
+        return True
+    else:
+        try:
+            config_check()
+            api_instance = client.CustomObjectsApi(api_config_handler())
+            priority_classes = api_instance.list_cluster_custom_object(
+                group="scheduling.k8s.io",
+                version="v1",
+                plural="priorityclasses",
+            )
+            available_priority_classes = [
+                i["metadata"]["name"] for i in priority_classes["items"]
+            ]
+        except Exception as e:  # pragma: no cover
+            return _kube_api_error_handling(e)
+
+        if priority_class in available_priority_classes:
+            return True
+        else:
+            print(
+                f"Priority class {priority_class} is not available in the cluster"
+            )
+            return False
+
+
def from_k8_cluster_object(rc)
@@ -1486,6 +1623,12 @@

Methods

Applies the AppWrapper yaml, pushing the resource request onto the MCAD queue. """ + + # Before attempting to bring up the cluster let's evaluate the ClusterConfig + if not self.evaluate_config(): + print("Invalid Cluster Configuration") + return False + namespace = self.config.namespace try: config_check() @@ -1570,6 +1713,8 @@

create_app_wrapper
  • details
  • down
  • +
  • evaluate_config
  • +
  • evaluate_dispatch_priority
  • from_k8_cluster_object
  • job_logs
  • job_status
  • diff --git a/docs/cluster/config.html b/docs/cluster/config.html index 168d252b2..0575c01c8 100644 --- a/docs/cluster/config.html +++ b/docs/cluster/config.html @@ -78,7 +78,8 @@

    Module codeflare_sdk.cluster.config

    envs: dict = field(default_factory=dict) image: str = "quay.io/project-codeflare/ray:2.5.0-py38-cu116" local_interactive: bool = False - image_pull_secrets: list = field(default_factory=list) + image_pull_secrets: list = field(default_factory=list) + dispatch_priority: str = None
    @@ -92,7 +93,7 @@

    Classes

    class ClusterConfiguration -(name: str, namespace: str = None, head_info: list = <factory>, machine_types: list = <factory>, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, envs: dict = <factory>, image: str = 'quay.io/project-codeflare/ray:2.5.0-py38-cu116', local_interactive: bool = False, image_pull_secrets: list = <factory>) +(name: str, namespace: str = None, head_info: list = <factory>, machine_types: list = <factory>, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, envs: dict = <factory>, image: str = 'quay.io/project-codeflare/ray:2.5.0-py38-cu116', local_interactive: bool = False, image_pull_secrets: list = <factory>, dispatch_priority: str = None)

    This dataclass is used to specify resource requirements and other details, and @@ -122,10 +123,15 @@

    Classes

    envs: dict = field(default_factory=dict) image: str = "quay.io/project-codeflare/ray:2.5.0-py38-cu116" local_interactive: bool = False - image_pull_secrets: list = field(default_factory=list) + image_pull_secrets: list = field(default_factory=list) + dispatch_priority: str = None

    Class variables

    +
    var dispatch_priority : str
    +
    +
    +
    var envs : dict
    @@ -211,6 +217,7 @@

    Index

  • ClusterConfiguration

      +
    • dispatch_priority
    • envs
    • head_info
    • image
    • diff --git a/docs/utils/generate_yaml.html b/docs/utils/generate_yaml.html index f8e6dcb97..fe0238991 100644 --- a/docs/utils/generate_yaml.html +++ b/docs/utils/generate_yaml.html @@ -120,6 +120,14 @@

      Module codeflare_sdk.utils.generate_yaml

      metadata.pop("labels") +def update_priority(item, dispatch_priority): + if dispatch_priority is not None: + head = item.get("generictemplate").get("spec").get("headGroupSpec") + worker = item.get("generictemplate").get("spec").get("workerGroupSpecs")[0] + head["template"]["spec"]["priorityClassName"] = dispatch_priority + worker["template"]["spec"]["priorityClassName"] = dispatch_priority + + def update_custompodresources( item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers ): @@ -206,6 +214,11 @@

      Module codeflare_sdk.utils.generate_yaml

      limits["nvidia.com/gpu"] = gpu +def update_scheduling_spec(yaml, workers): + spec = yaml.get("spec") + spec["schedulingSpec"]["minAvailable"] = workers + 1 + + def update_nodes( item, appwrapper_name, @@ -377,6 +390,7 @@

      Module codeflare_sdk.utils.generate_yaml

      env, local_interactive: bool, image_pull_secrets: list, + dispatch_priority: str, ): user_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -385,6 +399,8 @@

      Module codeflare_sdk.utils.generate_yaml

      route_item = resources["resources"].get("GenericItems")[1] update_names(user_yaml, item, appwrapper_name, cluster_name, namespace) update_labels(user_yaml, instascale, instance_types) + update_priority(item, dispatch_priority) + update_scheduling_spec(user_yaml, workers) update_custompodresources( item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers ) @@ -557,7 +573,7 @@

      Functions

  • -def generate_appwrapper(name: str, namespace: str, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list) +def generate_appwrapper(name: str, namespace: str, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list, dispatch_priority: str)
    @@ -581,6 +597,7 @@

    Functions

    env, local_interactive: bool, image_pull_secrets: list, + dispatch_priority: str, ): user_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -589,6 +606,8 @@

    Functions

    route_item = resources["resources"].get("GenericItems")[1] update_names(user_yaml, item, appwrapper_name, cluster_name, namespace) update_labels(user_yaml, instascale, instance_types) + update_priority(item, dispatch_priority) + update_scheduling_spec(user_yaml, workers) update_custompodresources( item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers ) @@ -878,6 +897,23 @@

    Functions

    update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu)
    +
    +def update_priority(item, dispatch_priority) +
    +
    +
    +
    + +Expand source code + +
    def update_priority(item, dispatch_priority):
    +    if dispatch_priority is not None:
    +        head = item.get("generictemplate").get("spec").get("headGroupSpec")
    +        worker = item.get("generictemplate").get("spec").get("workerGroupSpecs")[0]
    +        head["template"]["spec"]["priorityClassName"] = dispatch_priority
    +        worker["template"]["spec"]["priorityClassName"] = dispatch_priority
    +
    +
    def update_rayclient_route(route_item, cluster_name, namespace)
    @@ -920,6 +956,20 @@

    Functions

    limits["nvidia.com/gpu"] = gpu
    +
    +def update_scheduling_spec(yaml, workers) +
    +
    +
    +
    + +Expand source code + +
    def update_scheduling_spec(yaml, workers):
    +    spec = yaml.get("spec")
    +    spec["schedulingSpec"]["minAvailable"] = workers + 1
    +
    +
    def write_user_appwrapper(user_yaml, output_file_name)
    @@ -968,8 +1018,10 @@

    Index

  • update_labels
  • update_names
  • update_nodes
  • +
  • update_priority
  • update_rayclient_route
  • update_resources
  • +
  • update_scheduling_spec
  • write_user_appwrapper