Skip to content

Commit

Permalink
Add UI table with Cluster specs and action buttons (#681)
Browse files Browse the repository at this point in the history
* Add UI table with RayCluster specs and action buttons

* Update cpu and mem names from UI table

* Merge requests-limits into single column in UI table

* Enhance notebook outputs/display on button clicks

* Refactor and move UI table to widgets.py file

* Add unit tests for UI table functions

* Add timeout and interval parameters to _delete_cluster function

* Pre-select cluster if exists, and suppress widgets and outputs on creation of Cluster Object, and bug fixes

* Add UI table to regression and functionality tests

* Update codeflare_sdk.egg-info

* Fix to hide toolbar before capturing snapshots for UI notebook tests

* Add head comments to functions and add num_workers to data frame

* Reformat for pre-commit checks

* Revert codeflare_sdk.egg-info name
  • Loading branch information
ChristianZaccaria authored Sep 27, 2024
1 parent 80fabe3 commit c2eaa15
Show file tree
Hide file tree
Showing 13 changed files with 756 additions and 43 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ui_notebooks_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ jobs:
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 3_widget_example.ipynb > 3_widget_example.ipynb.tmp && mv 3_widget_example.ipynb.tmp 3_widget_example.ipynb
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 3_widget_example.ipynb > 3_widget_example.ipynb.tmp && mv 3_widget_example.ipynb.tmp 3_widget_example.ipynb
# Set explicit namespace as SDK need it (currently) to resolve local queues
sed -i "s/head_memory_limits=2,/head_memory_limits=2, namespace='default',/" 3_widget_example.ipynb
sed -i "s|head_memory_limits=2,|head_memory_limits=2, namespace='default', image='quay.io/modh/ray:2.35.0-py39-cu121',|" 3_widget_example.ipynb
sed -i "s|view_clusters()|view_clusters('default')|" 3_widget_example.ipynb
working-directory: demo-notebooks/guided-demos

- name: Run UI notebook tests
Expand Down
20 changes: 15 additions & 5 deletions demo-notebooks/guided-demos/3_widget_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"outputs": [],
"source": [
"# Import pieces from codeflare-sdk\n",
"from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication"
"from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication, view_clusters"
]
},
{
Expand Down Expand Up @@ -61,7 +61,7 @@
"# Create and configure our cluster object\n",
"# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
"cluster = Cluster(ClusterConfiguration(\n",
" name='raytest', \n",
" name='raytest',\n",
" head_cpu_requests='500m',\n",
" head_cpu_limits='500m',\n",
" head_memory_requests=2,\n",
Expand All @@ -73,12 +73,22 @@
" worker_cpu_limits=1,\n",
" worker_memory_requests=2,\n",
" worker_memory_limits=2,\n",
" # image=\"\", # Optional Field \n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # image=\"\", # Optional Field\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources\n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3de6403c",
"metadata": {},
"outputs": [],
"source": [
"view_clusters()"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -106,7 +116,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
"version": "3.9.18"
},
"vscode": {
"interpreter": {
Expand Down
2 changes: 2 additions & 0 deletions src/codeflare_sdk.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ src/codeflare_sdk/cluster/awload.py
src/codeflare_sdk/cluster/cluster.py
src/codeflare_sdk/cluster/config.py
src/codeflare_sdk/cluster/model.py
src/codeflare_sdk/cluster/widgets.py
src/codeflare_sdk/job/__init__.py
src/codeflare_sdk/job/ray_jobs.py
src/codeflare_sdk/utils/__init__.py
src/codeflare_sdk/utils/demos.py
src/codeflare_sdk/utils/generate_cert.py
src/codeflare_sdk/utils/generate_yaml.py
src/codeflare_sdk/utils/kube_api_helpers.py
Expand Down
1 change: 1 addition & 0 deletions src/codeflare_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
get_cluster,
list_all_queued,
list_all_clusters,
view_clusters,
)

from .job import RayJobClient
Expand Down
4 changes: 4 additions & 0 deletions src/codeflare_sdk/cluster/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,8 @@
list_all_clusters,
)

from .widgets import (
view_clusters,
)

from .awload import AWManager
17 changes: 11 additions & 6 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"""

import re
import subprocess
from time import sleep
from typing import List, Optional, Tuple, Dict

Expand Down Expand Up @@ -862,16 +863,19 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
name=rc["metadata"]["name"],
status=status,
# for now we are not using autoscaling so same replicas is fine
workers=rc["spec"]["workerGroupSpecs"][0]["replicas"],
num_workers=rc["spec"]["workerGroupSpecs"][0]["replicas"],
worker_mem_limits=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
"containers"
][0]["resources"]["limits"]["memory"],
worker_mem_requests=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
"containers"
][0]["resources"]["requests"]["memory"],
worker_cpu=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][
0
]["resources"]["limits"]["cpu"],
worker_cpu_requests=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
"containers"
][0]["resources"]["requests"]["cpu"],
worker_cpu_limits=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
"containers"
][0]["resources"]["limits"]["cpu"],
worker_extended_resources=worker_extended_resources,
namespace=rc["metadata"]["namespace"],
head_cpu_requests=rc["spec"]["headGroupSpec"]["template"]["spec"]["containers"][
Expand Down Expand Up @@ -907,10 +911,11 @@ def _copy_to_ray(cluster: Cluster) -> RayCluster:
ray = RayCluster(
name=cluster.config.name,
status=cluster.status(print_to_console=False)[0],
workers=cluster.config.num_workers,
num_workers=cluster.config.num_workers,
worker_mem_requests=cluster.config.worker_memory_requests,
worker_mem_limits=cluster.config.worker_memory_limits,
worker_cpu=cluster.config.worker_cpu_requests,
worker_cpu_requests=cluster.config.worker_cpu_requests,
worker_cpu_limits=cluster.config.worker_cpu_limits,
worker_extended_resources=cluster.config.worker_extended_resource_requests,
namespace=cluster.config.namespace,
dashboard=cluster.cluster_dashboard_uri(),
Expand Down
6 changes: 4 additions & 2 deletions src/codeflare_sdk/cluster/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from dataclasses import dataclass, field
from enum import Enum
import typing
from typing import Union


class RayClusterStatus(Enum):
Expand Down Expand Up @@ -77,10 +78,11 @@ class RayCluster:
head_cpu_limits: int
head_mem_requests: str
head_mem_limits: str
workers: int
num_workers: int
worker_mem_requests: str
worker_mem_limits: str
worker_cpu: int
worker_cpu_requests: Union[int, str]
worker_cpu_limits: Union[int, str]
namespace: str
dashboard: str
worker_extended_resources: typing.Dict[str, int] = field(default_factory=dict)
Expand Down
Loading

0 comments on commit c2eaa15

Please sign in to comment.