diff --git a/demo-notebooks/additional-demos/hf_interactive.ipynb b/demo-notebooks/additional-demos/hf_interactive.ipynb index 3fc228f02..7e2a7180f 100644 --- a/demo-notebooks/additional-demos/hf_interactive.ipynb +++ b/demo-notebooks/additional-demos/hf_interactive.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "c737a768-6e31-4767-a301-60ae932b4ed9", "metadata": {}, "outputs": [], @@ -74,18 +74,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "220b9d85-3a3c-4c0c-aaf2-0d866823dcd8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Written to: hfgputest.yaml\n" - ] - } - ], + "outputs": [], "source": [ "# Create our cluster and submit\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", @@ -132,52 +124,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "4d0db5f5-22f1-4806-ae7e-a0ee865625c1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────╮\n",
-       "│   🚀 List of CodeFlare  │\n",
-       "│   clusters in queue🚀   │\n",
-       "│ +-----------+---------+ │\n",
-       "│ | Name      | Status  | │\n",
-       "│ +===========+=========+ │\n",
-       "│ | hfgputest | pending | │\n",
-       "│ |           |         | │\n",
-       "│ +-----------+---------+ │\n",
-       "╰─────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "╭─────────────────────────╮\n", - "│ \u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare\u001b[0m\u001b[3m \u001b[0m │\n", - "│ \u001b[3m \u001b[0m\u001b[1;3mclusters in queue🚀\u001b[0m\u001b[3m \u001b[0m │\n", - "│ +-----------+---------+ │\n", - "│ |\u001b[1m \u001b[0m\u001b[1mName \u001b[0m\u001b[1m \u001b[0m|\u001b[1m \u001b[0m\u001b[1mStatus \u001b[0m\u001b[1m \u001b[0m| │\n", - "│ +===========+=========+ │\n", - "│ |\u001b[36m \u001b[0m\u001b[36mhfgputest\u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35mpending\u001b[0m\u001b[35m \u001b[0m| │\n", - "│ |\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m| │\n", - "│ +-----------+---------+ │\n", - "╰─────────────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "(False, )" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.status()" ] @@ -212,75 +162,17 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "06a54428-f186-4c27-948e-4eaf9c0e34b5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                  🚀 List of CodeFlare clusters 🚀                  \n",
-       "                                                                    \n",
-       " ╭────────────────────────────────────────────────────────────────╮ \n",
-       " │   Owner                                                        │ \n",
-       " │   hfgputest                                        Active ✅   │ \n",
-       " │                                                                │ \n",
-       " │   URI: ray://hfgputest-head-svc.default.svc:10001              │ \n",
-       " │                                                                │ \n",
-       " │   Dashboard🔗                                                  │ \n",
-       " │                                                                │ \n",
-       " │                      Cluster Resources                         │ \n",
-       " │   ╭─ Workers ──╮  ╭───────── Worker specs(each) ─────────╮     │ \n",
-       " │   │  Min  Max  │  │  Memory      CPU         GPU         │     │ \n",
-       " │   │            │  │                                      │     │ \n",
-       " │   │  1    1    │  │  16G~16G     8           4           │     │ \n",
-       " │   │            │  │                                      │     │ \n",
-       " │   ╰────────────╯  ╰──────────────────────────────────────╯     │ \n",
-       " ╰────────────────────────────────────────────────────────────────╯ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare clusters 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭────────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mOwner\u001b[0m │ \n", - " │ \u001b[1;4mhfgputest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://hfgputest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=552692;ray-dashboard-hfgputest-default.apps.prepfullinstall.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m1 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m16G~16G \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m8 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m4 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰────────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.details()" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "8ac46c87-70f1-4c70-9648-881151665355", "metadata": {}, "outputs": [], @@ -319,18 +211,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "4c458589-5a17-47c6-a8db-625427ae4fe7", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ray cluster is up and running: True\n" - ] - } - ], + "outputs": [], "source": [ "#before proceeding make sure the cluster exists and the uri is not empty\n", "assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n", @@ -377,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "e69994b4-1a13-43fe-b698-2a5374cb941b", "metadata": {}, "outputs": [], @@ -473,972 +357,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "7f0985e9-5e88-4d36-ab38-c3001c13f97c", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading builder script: 100%|██████████| 4.31k/4.31k [00:00<00:00, 5.60MB/s]\n", - "Downloading metadata: 100%|██████████| 2.17k/2.17k [00:00<00:00, 3.13MB/s]\n", - "Downloading readme: 100%|██████████| 7.59k/7.59k [00:00<00:00, 9.75MB/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Downloading and preparing dataset imdb/plain_text to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading data: 0%| | 0.00/84.1M [00:00\n", - "
\n", - "

Ray

\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - "
Python version:3.8.13
Ray version: 2.1.0
Dashboard:http://10.254.20.41:8265
\n", - "
\n", - "\n" - ], - "text/plain": [ - "ClientContext(dashboard_url='10.254.20.41:8265', python_version='3.8.13', ray_version='2.1.0', ray_commit='23f34d948dae8de9b168667ab27e6cf940b3ae85', protocol_version='2022-10-05', _num_clients=1, _context_to_restore=)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import ray\n", "\n", @@ -208,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "3436eb4a-217c-4109-a3c3-309fda7e2442", "metadata": {}, "outputs": [], @@ -232,72 +153,32 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "5cca1874-2be3-4631-ae48-9adfa45e3af3", "metadata": { - "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-27 19:14:28,222\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000002000000\n", - "2023-06-27 19:14:28,222\tDEBUG worker.py:564 -- Scheduling task heavy_calculation 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00'\n" - ] - } - ], + "outputs": [], "source": [ "ref = heavy_calculation.remote(3000)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "01172c29-e8bf-41ef-8db5-eccb07906111", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-27 19:14:29,202\tDEBUG worker.py:640 -- Retaining 16310a0f0a45af5cffffffffffffffffffffffff0100000001000000\n", - "2023-06-27 19:14:31,224\tDEBUG worker.py:439 -- Internal retry for get [ClientObjectRef(16310a0f0a45af5cffffffffffffffffffffffff0100000001000000)]\n" - ] - }, - { - "data": { - "text/plain": [ - "1789.4644387076714" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ray.get(ref)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "9e79b547-a457-4232-b77d-19147067b972", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-27 19:14:33,161\tDEBUG dataclient.py:287 -- Got unawaited response connection_cleanup {\n", - "}\n", - "\n", - "2023-06-27 19:14:34,460\tDEBUG dataclient.py:278 -- Shutting down data channel.\n" - ] - } - ], + "outputs": [], "source": [ "ray.cancel(ref)\n", "ray.shutdown()" @@ -305,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "2c198f1f-68bf-43ff-a148-02b5cb000ff2", "metadata": {}, "outputs": [], diff --git a/demo-notebooks/additional-demos/ray_job_client.ipynb b/demo-notebooks/additional-demos/ray_job_client.ipynb index 828b34696..2f43306e9 100644 --- a/demo-notebooks/additional-demos/ray_job_client.ipynb +++ b/demo-notebooks/additional-demos/ray_job_client.ipynb @@ -36,15 +36,15 @@ ] }, { - "cell_type": "markdown", - "id": "18de2d65", - "metadata": {}, - "source": [ - "\n", - "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n", - "If you have your own Ray image which suits your purposes, specify it in image field to override the default image." - ] - }, + "cell_type": "markdown", + "id": "18de2d65", + "metadata": {}, + "source": [ + "\n", + "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n", + "If you have your own Ray image which suits your purposes, specify it in image field to override the default image." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/demo-notebooks/guided-demos/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/1_cluster_job_client.ipynb index eb37f70b8..56585e373 100644 --- a/demo-notebooks/guided-demos/1_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/1_cluster_job_client.ipynb @@ -35,16 +35,16 @@ ] }, { - "cell_type": "markdown", - "id": "bc27f84c", - "metadata": {}, - "source": [ - "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", - "\n", - "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n", - "If you have your own Ray image which suits your purposes, specify it in image field to override the default image." - ] - }, + "cell_type": "markdown", + "id": "bc27f84c", + "metadata": {}, + "source": [ + "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", + "\n", + "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n", + "If you have your own Ray image which suits your purposes, specify it in image field to override the default image." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb index dc2073760..03270e8b7 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", "metadata": {}, "outputs": [], @@ -53,18 +53,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "0f4bc870-091f-4e11-9642-cba145710159", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Written to: raytest.yaml\n" - ] - } - ], + "outputs": [], "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", @@ -93,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", "metadata": {}, "outputs": [], @@ -112,125 +104,30 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "3c1b4311-2e61-44c9-8225-87c2db11363d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭───────────────────────╮\n",
-       "│    🚀 Cluster Queue   │\n",
-       "│       Status 🚀       │\n",
-       "│ +---------+---------+ │\n",
-       "│ | Name    | Status  | │\n",
-       "│ +=========+=========+ │\n",
-       "│ | raytest | pending | │\n",
-       "│ |         |         | │\n",
-       "│ +---------+---------+ │\n",
-       "╰───────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "╭───────────────────────╮\n", - "│ \u001b[3m \u001b[0m\u001b[1;3m 🚀 Cluster Queue\u001b[0m\u001b[3m \u001b[0m │\n", - "│ \u001b[3m \u001b[0m\u001b[1;3mStatus 🚀\u001b[0m\u001b[3m \u001b[0m │\n", - "│ +---------+---------+ │\n", - "│ |\u001b[1m \u001b[0m\u001b[1mName \u001b[0m\u001b[1m \u001b[0m|\u001b[1m \u001b[0m\u001b[1mStatus \u001b[0m\u001b[1m \u001b[0m| │\n", - "│ +=========+=========+ │\n", - "│ |\u001b[36m \u001b[0m\u001b[36mraytest\u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35mpending\u001b[0m\u001b[35m \u001b[0m| │\n", - "│ |\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m| │\n", - "│ +---------+---------+ │\n", - "╰───────────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "(, False)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.status()" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "a99d5aff", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for requested resources to be set up...\n", - "Requested cluster up and running!\n" - ] - } - ], + "outputs": [], "source": [ "cluster.wait_ready()" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "df71c1ed", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                  🚀 CodeFlare Cluster Status 🚀                  \n",
-       "                                                                  \n",
-       " ╭──────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                       │ \n",
-       " │   raytest                                        Active ✅   │ \n",
-       " │                                                              │ \n",
-       " │   URI: ray://raytest-head-svc.default.svc:10001              │ \n",
-       " │                                                              │ \n",
-       " │   Dashboard🔗                                                │ \n",
-       " │                                                              │ \n",
-       " ╰──────────────────────────────────────────────────────────────╯ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Status 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭──────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4mraytest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://raytest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=630217;ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " ╰──────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "(, True)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.status()" ] @@ -245,68 +142,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                  🚀 CodeFlare Cluster Details 🚀                  \n",
-       "                                                                   \n",
-       " ╭───────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                        │ \n",
-       " │   raytest                                        Active ✅    │ \n",
-       " │                                                               │ \n",
-       " │   URI: ray://raytest-head-svc.default.svc:10001               │ \n",
-       " │                                                               │ \n",
-       " │   Dashboard🔗                                                 │ \n",
-       " │                                                               │ \n",
-       " │                       Cluster Resources                       │ \n",
-       " │   ╭── Workers ──╮  ╭───────── Worker specs(each) ─────────╮   │ \n",
-       " │   │  # Workers  │  │  Memory      CPU         GPU         │   │ \n",
-       " │   │             │  │                                      │   │ \n",
-       " │   │  2          │  │  4~4         1           0           │   │ \n",
-       " │   │             │  │                                      │   │ \n",
-       " │   ╰─────────────╯  ╰──────────────────────────────────────╯   │ \n",
-       " ╰───────────────────────────────────────────────────────────────╯ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭───────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4mraytest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://raytest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=623965;http://ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m4~4 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰───────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='raytest', status=, workers=2, worker_mem_min=4, worker_mem_max=4, worker_cpu=1, worker_gpu=0, namespace='default', dashboard='http://ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.details()" ] @@ -321,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", "metadata": {}, "outputs": [], diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb index 903be9f00..1a5e77f4e 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb @@ -35,16 +35,16 @@ ] }, { - "cell_type": "markdown", - "id": "bc27f84c", - "metadata": {}, - "source": [ - "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", - "\n", - "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n", - "If you have your own Ray image which suits your purposes, specify it in image field to override the default image." - ] - }, + "cell_type": "markdown", + "id": "bc27f84c", + "metadata": {}, + "source": [ + "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", + "\n", + "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n", + "If you have your own Ray image which suits your purposes, specify it in image field to override the default image." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb index eb6c8977b..08eaf0b81 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", "metadata": {}, "outputs": [], @@ -50,18 +50,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "0f4bc870-091f-4e11-9642-cba145710159", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Written to: interactivetest.yaml\n" - ] - } - ], + "outputs": [], "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", @@ -83,19 +75,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for requested resources to be set up...\n", - "Requested cluster up and running!\n" - ] - } - ], + "outputs": [], "source": [ "# Bring up the cluster\n", "cluster.up()\n", @@ -104,68 +87,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "df71c1ed", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                      🚀 CodeFlare Cluster Details 🚀                     \n",
-       "                                                                          \n",
-       " ╭──────────────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                               │ \n",
-       " │   interactivetest                                        Active ✅   │ \n",
-       " │                                                                      │ \n",
-       " │   URI: ray://interactivetest-head-svc.default.svc:10001              │ \n",
-       " │                                                                      │ \n",
-       " │   Dashboard🔗                                                        │ \n",
-       " │                                                                      │ \n",
-       " │                       Cluster Resources                              │ \n",
-       " │   ╭── Workers ──╮  ╭───────── Worker specs(each) ─────────╮          │ \n",
-       " │   │  # Workers  │  │  Memory      CPU         GPU         │          │ \n",
-       " │   │             │  │                                      │          │ \n",
-       " │   │  2          │  │  8~8         2           1           │          │ \n",
-       " │   │             │  │                                      │          │ \n",
-       " │   ╰─────────────╯  ╰──────────────────────────────────────╯          │ \n",
-       " ╰──────────────────────────────────────────────────────────────────────╯ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭──────────────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4minteractivetest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://interactivetest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=970589;http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰──────────────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='interactivetest', status=, workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.details()" ] @@ -182,19 +107,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "c1719bca", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\n", - "ray://interactivetest-head-svc.default.svc:10001\n" - ] - } - ], + "outputs": [], "source": [ "ray_dashboard_uri = cluster.cluster_dashboard_uri()\n", "ray_cluster_uri = cluster.cluster_uri()\n", @@ -225,18 +141,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "300146dc", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ray cluster is up and running: True\n" - ] - } - ], + "outputs": [], "source": [ "#before proceeding make sure the cluster exists and the uri is not empty\n", "assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n", @@ -266,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "1b36e0d9", "metadata": {}, "outputs": [], @@ -362,1111 +270,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "5901d958", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading builder script: 100%|██████████| 4.31k/4.31k [00:00<00:00, 20.9MB/s]\n", - "Downloading metadata: 100%|██████████| 2.17k/2.17k [00:00<00:00, 14.1MB/s]\n", - "Downloading readme: 100%|██████████| 7.59k/7.59k [00:00<00:00, 22.9MB/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Downloading and preparing dataset imdb/plain_text to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading data: 0%| | 0.00/84.1M [00:00 AllToAllOperator[RandomizeBlockOrder]\n", - "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,957\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,958\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,969\tINFO streaming_executor.py:149 -- Shutting down .\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,912\tINFO config.py:86 -- Setting up process group for: env:// [rank=0, world_size=2]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:01 (running for 00:00:10.18)\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n", - "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:91 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[RandomizeBlockOrder]\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,274\tINFO streaming_executor.py:149 -- Shutting down .\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:91 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[RandomizeBlockOrder]\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,263\tINFO streaming_executor.py:149 -- Shutting down .\n", - "Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 151kB/s]\n", - "Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 146kB/s]\n", - "Downloading model.safetensors: 0%| | 0.00/268M [00:00