diff --git a/demo-notebooks/additional-demos/hf_interactive.ipynb b/demo-notebooks/additional-demos/hf_interactive.ipynb
index 3fc228f02..7e2a7180f 100644
--- a/demo-notebooks/additional-demos/hf_interactive.ipynb
+++ b/demo-notebooks/additional-demos/hf_interactive.ipynb
@@ -34,7 +34,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "c737a768-6e31-4767-a301-60ae932b4ed9",
"metadata": {},
"outputs": [],
@@ -74,18 +74,10 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "220b9d85-3a3c-4c0c-aaf2-0d866823dcd8",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Written to: hfgputest.yaml\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Create our cluster and submit\n",
"# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
@@ -132,52 +124,10 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "4d0db5f5-22f1-4806-ae7e-a0ee865625c1",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
╭─────────────────────────╮\n",
- "│ 🚀 List of CodeFlare │\n",
- "│ clusters in queue🚀 │\n",
- "│ +-----------+---------+ │\n",
- "│ | Name | Status | │\n",
- "│ +===========+=========+ │\n",
- "│ | hfgputest | pending | │\n",
- "│ | | | │\n",
- "│ +-----------+---------+ │\n",
- "╰─────────────────────────╯\n",
- "
\n"
- ],
- "text/plain": [
- "╭─────────────────────────╮\n",
- "│ \u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare\u001b[0m\u001b[3m \u001b[0m │\n",
- "│ \u001b[3m \u001b[0m\u001b[1;3mclusters in queue🚀\u001b[0m\u001b[3m \u001b[0m │\n",
- "│ +-----------+---------+ │\n",
- "│ |\u001b[1m \u001b[0m\u001b[1mName \u001b[0m\u001b[1m \u001b[0m|\u001b[1m \u001b[0m\u001b[1mStatus \u001b[0m\u001b[1m \u001b[0m| │\n",
- "│ +===========+=========+ │\n",
- "│ |\u001b[36m \u001b[0m\u001b[36mhfgputest\u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35mpending\u001b[0m\u001b[35m \u001b[0m| │\n",
- "│ |\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m| │\n",
- "│ +-----------+---------+ │\n",
- "╰─────────────────────────╯\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "(False, )"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cluster.status()"
]
@@ -212,75 +162,17 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "06a54428-f186-4c27-948e-4eaf9c0e34b5",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " 🚀 List of CodeFlare clusters 🚀 \n",
- " \n",
- " ╭────────────────────────────────────────────────────────────────╮ \n",
- " │ Owner │ \n",
- " │ hfgputest Active ✅ │ \n",
- " │ │ \n",
- " │ URI: ray://hfgputest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ Dashboard🔗 │ \n",
- " │ │ \n",
- " │ Cluster Resources │ \n",
- " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
- " │ │ Min Max │ │ Memory CPU GPU │ │ \n",
- " │ │ │ │ │ │ \n",
- " │ │ 1 1 │ │ 16G~16G 8 4 │ │ \n",
- " │ │ │ │ │ │ \n",
- " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n",
- " ╰────────────────────────────────────────────────────────────────╯ \n",
- "
\n"
- ],
- "text/plain": [
- "\u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare clusters 🚀\u001b[0m\u001b[3m \u001b[0m\n",
- "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n",
- " ╭────────────────────────────────────────────────────────────────╮ \n",
- " │ \u001b[1;37;42mOwner\u001b[0m │ \n",
- " │ \u001b[1;4mhfgputest\u001b[0m Active ✅ │ \n",
- " │ │ \n",
- " │ \u001b[1mURI:\u001b[0m ray://hfgputest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ \u001b]8;id=552692;ray-dashboard-hfgputest-default.apps.prepfullinstall.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n",
- " │ │ \n",
- " │ \u001b[3m Cluster Resources \u001b[0m │ \n",
- " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
- " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n",
- " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ │ \u001b[36m \u001b[0m\u001b[36m1 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m16G~16G \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m8 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m4 \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n",
- " ╰────────────────────────────────────────────────────────────────╯ \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cluster.details()"
]
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "8ac46c87-70f1-4c70-9648-881151665355",
"metadata": {},
"outputs": [],
@@ -319,18 +211,10 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "4c458589-5a17-47c6-a8db-625427ae4fe7",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Ray cluster is up and running: True\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"#before proceeding make sure the cluster exists and the uri is not empty\n",
"assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n",
@@ -377,7 +261,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"id": "e69994b4-1a13-43fe-b698-2a5374cb941b",
"metadata": {},
"outputs": [],
@@ -473,972 +357,10 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "7f0985e9-5e88-4d36-ab38-c3001c13f97c",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Downloading builder script: 100%|██████████| 4.31k/4.31k [00:00<00:00, 5.60MB/s]\n",
- "Downloading metadata: 100%|██████████| 2.17k/2.17k [00:00<00:00, 3.13MB/s]\n",
- "Downloading readme: 100%|██████████| 7.59k/7.59k [00:00<00:00, 9.75MB/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Downloading and preparing dataset imdb/plain_text to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Downloading data: 0%| | 0.00/84.1M [00:00, ?B/s]\n",
- "Downloading data: 0%| | 30.7k/84.1M [00:00<05:22, 261kB/s]\n",
- "Downloading data: 0%| | 89.1k/84.1M [00:00<03:31, 397kB/s]\n",
- "Downloading data: 0%| | 184k/84.1M [00:00<02:24, 582kB/s] \n",
- "Downloading data: 0%| | 373k/84.1M [00:00<01:25, 981kB/s]\n",
- "Downloading data: 1%| | 778k/84.1M [00:00<00:44, 1.86MB/s]\n",
- "Downloading data: 2%|▏ | 1.34M/84.1M [00:00<00:29, 2.83MB/s]\n",
- "Downloading data: 2%|▏ | 2.02M/84.1M [00:00<00:21, 3.79MB/s]\n",
- "Downloading data: 3%|▎ | 2.86M/84.1M [00:00<00:16, 4.85MB/s]\n",
- "Downloading data: 5%|▍ | 3.98M/84.1M [00:01<00:12, 6.27MB/s]\n",
- "Downloading data: 6%|▋ | 5.39M/84.1M [00:01<00:09, 8.02MB/s]\n",
- "Downloading data: 9%|▉ | 7.69M/84.1M [00:01<00:06, 11.8MB/s]\n",
- "Downloading data: 13%|█▎ | 11.2M/84.1M [00:01<00:04, 17.4MB/s]\n",
- "Downloading data: 18%|█▊ | 15.3M/84.1M [00:01<00:03, 22.5MB/s]\n",
- "Downloading data: 23%|██▎ | 19.7M/84.1M [00:01<00:02, 28.5MB/s]\n",
- "Downloading data: 27%|██▋ | 23.1M/84.1M [00:01<00:02, 29.9MB/s]\n",
- "Downloading data: 31%|███▏ | 26.4M/84.1M [00:01<00:01, 30.7MB/s]\n",
- "Downloading data: 37%|███▋ | 30.7M/84.1M [00:01<00:01, 34.5MB/s]\n",
- "Downloading data: 42%|████▏ | 35.5M/84.1M [00:02<00:01, 38.4MB/s]\n",
- "Downloading data: 47%|████▋ | 39.4M/84.1M [00:02<00:01, 38.6MB/s]\n",
- "Downloading data: 52%|█████▏ | 43.6M/84.1M [00:02<00:01, 39.6MB/s]\n",
- "Downloading data: 58%|█████▊ | 48.7M/84.1M [00:02<00:00, 42.8MB/s]\n",
- "Downloading data: 63%|██████▎ | 53.0M/84.1M [00:02<00:00, 42.7MB/s]\n",
- "Downloading data: 68%|██████▊ | 57.3M/84.1M [00:02<00:00, 42.9MB/s]\n",
- "Downloading data: 74%|███████▎ | 62.0M/84.1M [00:02<00:00, 43.2MB/s]\n",
- "Downloading data: 80%|███████▉ | 67.3M/84.1M [00:02<00:00, 46.0MB/s]\n",
- "Downloading data: 85%|████████▌ | 71.9M/84.1M [00:02<00:00, 45.5MB/s]\n",
- "Downloading data: 91%|█████████ | 76.5M/84.1M [00:02<00:00, 45.3MB/s]\n",
- "Downloading data: 100%|██████████| 84.1M/84.1M [00:03<00:00, 27.2MB/s]\n",
- "Generating train split: 0%| | 0/25000 [00:00, ? examples/s]\n",
- "Generating train split: 0%| | 1/25000 [00:02<15:40:26, 2.26s/ examples]\n",
- "Generating train split: 3%|▎ | 749/25000 [00:02<00:54, 445.61 examples/s]\n",
- "Generating train split: 6%|▌ | 1514/25000 [00:02<00:23, 1010.13 examples/s]\n",
- "Generating train split: 9%|▉ | 2285/25000 [00:02<00:13, 1684.51 examples/s]\n",
- "Generating train split: 12%|█▏ | 3061/25000 [00:02<00:08, 2447.86 examples/s]\n",
- "Generating train split: 15%|█▌ | 3836/25000 [00:02<00:06, 3254.59 examples/s]\n",
- "Generating train split: 18%|█▊ | 4596/25000 [00:02<00:05, 4030.18 examples/s]\n",
- "Generating train split: 21%|██▏ | 5368/25000 [00:02<00:04, 4780.15 examples/s]\n",
- "Generating train split: 25%|██▍ | 6132/25000 [00:03<00:03, 5423.09 examples/s]\n",
- "Generating train split: 28%|██▊ | 6913/25000 [00:03<00:03, 6002.61 examples/s]\n",
- "Generating train split: 31%|███ | 7683/25000 [00:03<00:02, 6439.96 examples/s]\n",
- "Generating train split: 34%|███▍ | 8448/25000 [00:03<00:02, 6757.88 examples/s]\n",
- "Generating train split: 37%|███▋ | 9219/25000 [00:03<00:02, 7020.62 examples/s]\n",
- "Generating train split: 40%|████ | 10000/25000 [00:03<00:02, 6475.62 examples/s]\n",
- "Generating train split: 43%|████▎ | 10774/25000 [00:03<00:02, 6811.79 examples/s]\n",
- "Generating train split: 46%|████▌ | 11561/25000 [00:03<00:01, 7102.16 examples/s]\n",
- "Generating train split: 49%|████▉ | 12348/25000 [00:03<00:01, 7317.96 examples/s]\n",
- "Generating train split: 53%|█████▎ | 13143/25000 [00:04<00:01, 7498.06 examples/s]\n",
- "Generating train split: 56%|█████▌ | 13919/25000 [00:04<00:01, 7573.16 examples/s]\n",
- "Generating train split: 59%|█████▉ | 14707/25000 [00:04<00:01, 7659.25 examples/s]\n",
- "Generating train split: 62%|██████▏ | 15494/25000 [00:04<00:01, 7721.19 examples/s]\n",
- "Generating train split: 65%|██████▌ | 16273/25000 [00:04<00:01, 7739.71 examples/s]\n",
- "Generating train split: 68%|██████▊ | 17056/25000 [00:04<00:01, 7765.06 examples/s]\n",
- "Generating train split: 71%|███████▏ | 17839/25000 [00:04<00:00, 7783.64 examples/s]\n",
- "Generating train split: 75%|███████▍ | 18628/25000 [00:04<00:00, 7813.80 examples/s]\n",
- "Generating train split: 78%|███████▊ | 19411/25000 [00:04<00:00, 7804.00 examples/s]\n",
- "Generating train split: 81%|████████ | 20193/25000 [00:04<00:00, 7072.48 examples/s]\n",
- "Generating train split: 84%|████████▍ | 20967/25000 [00:05<00:00, 7257.29 examples/s]\n",
- "Generating train split: 87%|████████▋ | 21749/25000 [00:05<00:00, 7416.22 examples/s]\n",
- "Generating train split: 90%|█████████ | 22540/25000 [00:05<00:00, 7556.79 examples/s]\n",
- "Generating train split: 93%|█████████▎| 23327/25000 [00:05<00:00, 7646.65 examples/s]\n",
- "Generating train split: 96%|█████████▋| 24108/25000 [00:05<00:00, 7694.25 examples/s]\n",
- "Generating train split: 100%|█████████▉| 24881/25000 [00:05<00:00, 7674.07 examples/s]\n",
- " \n",
- "Generating test split: 0%| | 0/25000 [00:00, ? examples/s]\n",
- "Generating test split: 0%| | 1/25000 [00:00<3:10:01, 2.19 examples/s]\n",
- "Generating test split: 3%|▎ | 766/25000 [00:00<00:13, 1826.18 examples/s]\n",
- "Generating test split: 6%|▌ | 1544/25000 [00:00<00:07, 3338.57 examples/s]\n",
- "Generating test split: 9%|▉ | 2332/25000 [00:00<00:04, 4546.72 examples/s]\n",
- "Generating test split: 12%|█▏ | 3116/25000 [00:00<00:04, 5453.13 examples/s]\n",
- "Generating test split: 16%|█▌ | 3905/25000 [00:00<00:03, 6140.31 examples/s]\n",
- "Generating test split: 19%|█▉ | 4688/25000 [00:01<00:03, 6624.84 examples/s]\n",
- "Generating test split: 22%|██▏ | 5474/25000 [00:01<00:02, 6982.92 examples/s]\n",
- "Generating test split: 25%|██▌ | 6257/25000 [00:01<00:02, 7230.53 examples/s]\n",
- "Generating test split: 28%|██▊ | 7046/25000 [00:01<00:02, 7423.93 examples/s]\n",
- "Generating test split: 31%|███▏ | 7829/25000 [00:01<00:02, 7543.45 examples/s]\n",
- "Generating test split: 34%|███▍ | 8614/25000 [00:01<00:02, 7633.64 examples/s]\n",
- "Generating test split: 38%|███▊ | 9394/25000 [00:01<00:02, 7680.82 examples/s]\n",
- "Generating test split: 41%|████ | 10174/25000 [00:01<00:02, 7005.77 examples/s]\n",
- "Generating test split: 44%|████▍ | 10949/25000 [00:01<00:01, 7213.25 examples/s]\n",
- "Generating test split: 47%|████▋ | 11730/25000 [00:01<00:01, 7381.64 examples/s]\n",
- "Generating test split: 50%|█████ | 12515/25000 [00:02<00:01, 7516.21 examples/s]\n",
- "Generating test split: 53%|█████▎ | 13301/25000 [00:02<00:01, 7615.13 examples/s]\n",
- "Generating test split: 56%|█████▋ | 14087/25000 [00:02<00:01, 7684.18 examples/s]\n",
- "Generating test split: 60%|█████▉ | 14876/25000 [00:02<00:01, 7744.99 examples/s]\n",
- "Generating test split: 63%|██████▎ | 15664/25000 [00:02<00:01, 7779.58 examples/s]\n",
- "Generating test split: 66%|██████▌ | 16456/25000 [00:02<00:01, 7817.82 examples/s]\n",
- "Generating test split: 69%|██████▉ | 17240/25000 [00:02<00:00, 7819.40 examples/s]\n",
- "Generating test split: 72%|███████▏ | 18024/25000 [00:02<00:00, 7816.77 examples/s]\n",
- "Generating test split: 75%|███████▌ | 18808/25000 [00:02<00:00, 7823.24 examples/s]\n",
- "Generating test split: 78%|███████▊ | 19593/25000 [00:02<00:00, 7829.38 examples/s]\n",
- "Generating test split: 82%|████████▏ | 20377/25000 [00:03<00:00, 7091.67 examples/s]\n",
- "Generating test split: 85%|████████▍ | 21155/25000 [00:03<00:00, 7283.38 examples/s]\n",
- "Generating test split: 88%|████████▊ | 21937/25000 [00:03<00:00, 7434.19 examples/s]\n",
- "Generating test split: 91%|█████████ | 22724/25000 [00:03<00:00, 7560.18 examples/s]\n",
- "Generating test split: 94%|█████████▍| 23514/25000 [00:03<00:00, 7658.45 examples/s]\n",
- "Generating test split: 97%|█████████▋| 24285/25000 [00:03<00:00, 7624.86 examples/s]\n",
- "Generating unsupervised split: 0%| | 0/50000 [00:00, ? examples/s] \n",
- "Generating unsupervised split: 0%| | 1/50000 [00:04<56:42:02, 4.08s/ examples]\n",
- "Generating unsupervised split: 2%|▏ | 771/50000 [00:04<03:08, 260.56 examples/s]\n",
- "Generating unsupervised split: 3%|▎ | 1525/50000 [00:04<01:21, 595.91 examples/s]\n",
- "Generating unsupervised split: 5%|▍ | 2297/50000 [00:04<00:46, 1035.82 examples/s]\n",
- "Generating unsupervised split: 6%|▌ | 3065/50000 [00:04<00:29, 1574.39 examples/s]\n",
- "Generating unsupervised split: 8%|▊ | 3795/50000 [00:04<00:21, 2168.20 examples/s]\n",
- "Generating unsupervised split: 9%|▉ | 4564/50000 [00:04<00:15, 2880.92 examples/s]\n",
- "Generating unsupervised split: 11%|█ | 5351/50000 [00:04<00:12, 3657.19 examples/s]\n",
- "Generating unsupervised split: 12%|█▏ | 6099/50000 [00:04<00:11, 3947.54 examples/s]\n",
- "Generating unsupervised split: 14%|█▎ | 6874/50000 [00:05<00:09, 4672.28 examples/s]\n",
- "Generating unsupervised split: 15%|█▌ | 7653/50000 [00:05<00:07, 5336.59 examples/s]\n",
- "Generating unsupervised split: 17%|█▋ | 8430/50000 [00:05<00:07, 5905.86 examples/s]\n",
- "Generating unsupervised split: 18%|█▊ | 9206/50000 [00:05<00:06, 6366.96 examples/s]\n",
- "Generating unsupervised split: 20%|█▉ | 9971/50000 [00:05<00:05, 6703.01 examples/s]\n",
- "Generating unsupervised split: 21%|██▏ | 10725/50000 [00:05<00:06, 6205.86 examples/s]\n",
- "Generating unsupervised split: 23%|██▎ | 11505/50000 [00:05<00:05, 6617.62 examples/s]\n",
- "Generating unsupervised split: 25%|██▍ | 12291/50000 [00:05<00:05, 6952.09 examples/s]\n",
- "Generating unsupervised split: 26%|██▌ | 13080/50000 [00:05<00:05, 7212.23 examples/s]\n",
- "Generating unsupervised split: 28%|██▊ | 13852/50000 [00:05<00:04, 7356.03 examples/s]\n",
- "Generating unsupervised split: 29%|██▉ | 14628/50000 [00:06<00:04, 7471.67 examples/s]\n",
- "Generating unsupervised split: 31%|███ | 15399/50000 [00:06<00:04, 7539.92 examples/s]\n",
- "Generating unsupervised split: 32%|███▏ | 16181/50000 [00:06<00:04, 7619.82 examples/s]\n",
- "Generating unsupervised split: 34%|███▍ | 16967/50000 [00:06<00:04, 7690.56 examples/s]\n",
- "Generating unsupervised split: 36%|███▌ | 17753/50000 [00:06<00:04, 7738.38 examples/s]\n",
- "Generating unsupervised split: 37%|███▋ | 18531/50000 [00:06<00:04, 7742.28 examples/s]\n",
- "Generating unsupervised split: 39%|███▊ | 19311/50000 [00:06<00:03, 7754.75 examples/s]\n",
- "Generating unsupervised split: 40%|████ | 20089/50000 [00:06<00:04, 7009.65 examples/s]\n",
- "Generating unsupervised split: 42%|████▏ | 20862/50000 [00:06<00:04, 7208.36 examples/s]\n",
- "Generating unsupervised split: 43%|████▎ | 21633/50000 [00:07<00:03, 7348.02 examples/s]\n",
- "Generating unsupervised split: 45%|████▍ | 22408/50000 [00:07<00:03, 7463.78 examples/s]\n",
- "Generating unsupervised split: 46%|████▋ | 23192/50000 [00:07<00:03, 7573.20 examples/s]\n",
- "Generating unsupervised split: 48%|████▊ | 23971/50000 [00:07<00:03, 7636.86 examples/s]\n",
- "Generating unsupervised split: 49%|████▉ | 24739/50000 [00:07<00:03, 7621.64 examples/s]\n",
- "Generating unsupervised split: 51%|█████ | 25517/50000 [00:07<00:03, 7667.30 examples/s]\n",
- "Generating unsupervised split: 53%|█████▎ | 26286/50000 [00:07<00:03, 7661.28 examples/s]\n",
- "Generating unsupervised split: 54%|█████▍ | 27058/50000 [00:07<00:02, 7675.99 examples/s]\n",
- "Generating unsupervised split: 56%|█████▌ | 27848/50000 [00:07<00:02, 7740.61 examples/s]\n",
- "Generating unsupervised split: 57%|█████▋ | 28629/50000 [00:07<00:02, 7756.47 examples/s]\n",
- "Generating unsupervised split: 59%|█████▉ | 29411/50000 [00:08<00:02, 7774.99 examples/s]\n",
- "Generating unsupervised split: 60%|██████ | 30189/50000 [00:08<00:02, 6960.63 examples/s]\n",
- "Generating unsupervised split: 62%|██████▏ | 30922/50000 [00:08<00:02, 7061.87 examples/s]\n",
- "Generating unsupervised split: 63%|██████▎ | 31660/50000 [00:08<00:02, 7149.84 examples/s]\n",
- "Generating unsupervised split: 65%|██████▍ | 32440/50000 [00:08<00:02, 7335.25 examples/s]\n",
- "Generating unsupervised split: 66%|██████▋ | 33224/50000 [00:08<00:02, 7481.89 examples/s]\n",
- "Generating unsupervised split: 68%|██████▊ | 34007/50000 [00:08<00:02, 7582.15 examples/s]\n",
- "Generating unsupervised split: 70%|██████▉ | 34795/50000 [00:08<00:01, 7669.23 examples/s]\n",
- "Generating unsupervised split: 71%|███████ | 35582/50000 [00:08<00:01, 7728.71 examples/s]\n",
- "Generating unsupervised split: 73%|███████▎ | 36366/50000 [00:08<00:01, 7759.63 examples/s]\n",
- "Generating unsupervised split: 74%|███████▍ | 37151/50000 [00:09<00:01, 7784.00 examples/s]\n",
- "Generating unsupervised split: 76%|███████▌ | 37935/50000 [00:09<00:01, 7798.60 examples/s]\n",
- "Generating unsupervised split: 77%|███████▋ | 38719/50000 [00:09<00:01, 7808.77 examples/s]\n",
- "Generating unsupervised split: 79%|███████▉ | 39501/50000 [00:09<00:01, 7745.30 examples/s]\n",
- "Generating unsupervised split: 81%|████████ | 40277/50000 [00:09<00:01, 6887.20 examples/s]\n",
- "Generating unsupervised split: 82%|████████▏ | 41043/50000 [00:09<00:01, 7098.58 examples/s]\n",
- "Generating unsupervised split: 84%|████████▎ | 41820/50000 [00:09<00:01, 7285.74 examples/s]\n",
- "Generating unsupervised split: 85%|████████▌ | 42600/50000 [00:09<00:00, 7433.09 examples/s]\n",
- "Generating unsupervised split: 87%|████████▋ | 43379/50000 [00:09<00:00, 7533.84 examples/s]\n",
- "Generating unsupervised split: 88%|████████▊ | 44161/50000 [00:10<00:00, 7616.56 examples/s]\n",
- "Generating unsupervised split: 90%|████████▉ | 44939/50000 [00:10<00:00, 7663.82 examples/s]\n",
- "Generating unsupervised split: 91%|█████████▏| 45719/50000 [00:10<00:00, 7703.42 examples/s]\n",
- "Generating unsupervised split: 93%|█████████▎| 46507/50000 [00:10<00:00, 7753.52 examples/s]\n",
- "Generating unsupervised split: 95%|█████████▍| 47285/50000 [00:10<00:00, 7758.62 examples/s]\n",
- "Generating unsupervised split: 96%|█████████▌| 48063/50000 [00:10<00:00, 7751.50 examples/s]\n",
- "Generating unsupervised split: 98%|█████████▊| 48854/50000 [00:10<00:00, 7796.96 examples/s]\n",
- "Generating unsupervised split: 99%|█████████▉| 49641/50000 [00:10<00:00, 7818.59 examples/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Dataset imdb downloaded and prepared to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1. Subsequent calls will reuse this data.\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 3/3 [00:00<00:00, 696.30it/s] \n",
- "Downloading: 100%|██████████| 28.0/28.0 [00:00<00:00, 32.1kB/s]\n",
- "Downloading: 100%|██████████| 483/483 [00:00<00:00, 600kB/s]\n",
- "Downloading: 0%| | 0.00/232k [00:00, ?B/s]\n",
- "Downloading: 100%|██████████| 232k/232k [00:00<00:00, 4.80MB/s]\n",
- "Downloading: 0%| | 0.00/466k [00:00, ?B/s]\n",
- "Downloading: 100%|██████████| 466k/466k [00:00<00:00, 7.88MB/s]\n",
- " 0%| | 0/25 [00:00, ?ba/s]\n",
- " 4%|▍ | 1/25 [00:00<00:15, 1.52ba/s]\n",
- " 8%|▊ | 2/25 [00:01<00:14, 1.57ba/s]\n",
- " 12%|█▏ | 3/25 [00:01<00:13, 1.59ba/s]\n",
- " 16%|█▌ | 4/25 [00:02<00:13, 1.59ba/s]\n",
- " 20%|██ | 5/25 [00:03<00:13, 1.52ba/s]\n",
- " 24%|██▍ | 6/25 [00:03<00:12, 1.54ba/s]\n",
- " 28%|██▊ | 7/25 [00:04<00:11, 1.55ba/s]\n",
- " 32%|███▏ | 8/25 [00:05<00:11, 1.53ba/s]\n",
- " 36%|███▌ | 9/25 [00:05<00:10, 1.54ba/s]\n",
- " 40%|████ | 10/25 [00:06<00:09, 1.54ba/s]\n",
- " 44%|████▍ | 11/25 [00:07<00:08, 1.58ba/s]\n",
- " 48%|████▊ | 12/25 [00:07<00:08, 1.58ba/s]\n",
- " 52%|█████▏ | 13/25 [00:08<00:07, 1.60ba/s]\n",
- " 56%|█████▌ | 14/25 [00:08<00:07, 1.57ba/s]\n",
- " 60%|██████ | 15/25 [00:09<00:06, 1.57ba/s]\n",
- " 64%|██████▍ | 16/25 [00:10<00:05, 1.55ba/s]\n",
- " 68%|██████▊ | 17/25 [00:10<00:05, 1.55ba/s]\n",
- " 72%|███████▏ | 18/25 [00:11<00:04, 1.56ba/s]\n",
- " 76%|███████▌ | 19/25 [00:12<00:03, 1.56ba/s]\n",
- " 80%|████████ | 20/25 [00:12<00:03, 1.55ba/s]\n",
- " 84%|████████▍ | 21/25 [00:13<00:02, 1.50ba/s]\n",
- " 88%|████████▊ | 22/25 [00:14<00:01, 1.51ba/s]\n",
- " 92%|█████████▏| 23/25 [00:14<00:01, 1.55ba/s]\n",
- " 96%|█████████▌| 24/25 [00:15<00:00, 1.55ba/s]\n",
- " 96%|█████████▌| 24/25 [00:16<00:00, 1.49ba/s]\n",
- " 0%| | 0/25 [00:00, ?ba/s]\n",
- " 4%|▍ | 1/25 [00:00<00:14, 1.67ba/s]\n",
- " 8%|▊ | 2/25 [00:01<00:13, 1.67ba/s]\n",
- " 12%|█▏ | 3/25 [00:01<00:13, 1.64ba/s]\n",
- " 16%|█▌ | 4/25 [00:02<00:12, 1.64ba/s]\n",
- " 20%|██ | 5/25 [00:03<00:12, 1.63ba/s]\n",
- " 24%|██▍ | 6/25 [00:03<00:11, 1.61ba/s]\n",
- " 28%|██▊ | 7/25 [00:04<00:11, 1.62ba/s]\n",
- " 32%|███▏ | 8/25 [00:04<00:10, 1.61ba/s]\n",
- " 36%|███▌ | 9/25 [00:05<00:10, 1.58ba/s]\n",
- " 40%|████ | 10/25 [00:06<00:09, 1.58ba/s]\n",
- " 44%|████▍ | 11/25 [00:06<00:08, 1.60ba/s]\n",
- " 48%|████▊ | 12/25 [00:07<00:08, 1.59ba/s]\n",
- " 52%|█████▏ | 13/25 [00:08<00:07, 1.55ba/s]\n",
- " 56%|█████▌ | 14/25 [00:08<00:06, 1.58ba/s]\n",
- " 60%|██████ | 15/25 [00:09<00:06, 1.58ba/s]\n",
- " 64%|██████▍ | 16/25 [00:10<00:05, 1.57ba/s]\n",
- " 68%|██████▊ | 17/25 [00:10<00:05, 1.56ba/s]\n",
- " 72%|███████▏ | 18/25 [00:11<00:04, 1.56ba/s]\n",
- " 76%|███████▌ | 19/25 [00:11<00:03, 1.57ba/s]\n",
- " 80%|████████ | 20/25 [00:12<00:03, 1.60ba/s]\n",
- " 84%|████████▍ | 21/25 [00:13<00:02, 1.61ba/s]\n",
- " 88%|████████▊ | 22/25 [00:13<00:01, 1.58ba/s]\n",
- " 92%|█████████▏| 23/25 [00:14<00:01, 1.58ba/s]\n",
- " 96%|█████████▌| 24/25 [00:15<00:00, 1.58ba/s]\n",
- " 96%|█████████▌| 24/25 [00:15<00:00, 1.53ba/s]\n",
- " 0%| | 0/50 [00:00, ?ba/s]\n",
- " 2%|▏ | 1/50 [00:00<00:29, 1.68ba/s]\n",
- " 4%|▍ | 2/50 [00:01<00:29, 1.63ba/s]\n",
- " 6%|▌ | 3/50 [00:01<00:29, 1.58ba/s]\n",
- " 8%|▊ | 4/50 [00:02<00:30, 1.51ba/s]\n",
- " 10%|█ | 5/50 [00:03<00:29, 1.51ba/s]\n",
- " 12%|█▏ | 6/50 [00:03<00:28, 1.56ba/s]\n",
- " 14%|█▍ | 7/50 [00:04<00:27, 1.54ba/s]\n",
- " 16%|█▌ | 8/50 [00:05<00:27, 1.53ba/s]\n",
- " 18%|█▊ | 9/50 [00:05<00:26, 1.56ba/s]\n",
- " 20%|██ | 10/50 [00:06<00:25, 1.55ba/s]\n",
- " 22%|██▏ | 11/50 [00:07<00:25, 1.55ba/s]\n",
- " 24%|██▍ | 12/50 [00:07<00:24, 1.56ba/s]\n",
- " 26%|██▌ | 13/50 [00:08<00:23, 1.57ba/s]\n",
- " 28%|██▊ | 14/50 [00:08<00:22, 1.57ba/s]\n",
- " 30%|███ | 15/50 [00:09<00:22, 1.55ba/s]\n",
- " 32%|███▏ | 16/50 [00:10<00:21, 1.55ba/s]\n",
- " 34%|███▍ | 17/50 [00:10<00:21, 1.56ba/s]\n",
- " 36%|███▌ | 18/50 [00:11<00:20, 1.56ba/s]\n",
- " 38%|███▊ | 19/50 [00:12<00:19, 1.56ba/s]\n",
- " 40%|████ | 20/50 [00:12<00:19, 1.56ba/s]\n",
- " 42%|████▏ | 21/50 [00:13<00:18, 1.53ba/s]\n",
- " 44%|████▍ | 22/50 [00:14<00:18, 1.55ba/s]\n",
- " 46%|████▌ | 23/50 [00:14<00:17, 1.54ba/s]\n",
- " 48%|████▊ | 24/50 [00:15<00:16, 1.56ba/s]\n",
- " 50%|█████ | 25/50 [00:16<00:15, 1.56ba/s]\n",
- " 52%|█████▏ | 26/50 [00:16<00:15, 1.57ba/s]\n",
- " 54%|█████▍ | 27/50 [00:17<00:14, 1.55ba/s]\n",
- " 56%|█████▌ | 28/50 [00:17<00:13, 1.60ba/s]\n",
- " 58%|█████▊ | 29/50 [00:18<00:13, 1.57ba/s]\n",
- " 60%|██████ | 30/50 [00:19<00:12, 1.59ba/s]\n",
- " 62%|██████▏ | 31/50 [00:19<00:12, 1.55ba/s]\n",
- " 64%|██████▍ | 32/50 [00:20<00:11, 1.55ba/s]\n",
- " 66%|██████▌ | 33/50 [00:21<00:10, 1.56ba/s]\n",
- " 68%|██████▊ | 34/50 [00:21<00:10, 1.58ba/s]\n",
- " 70%|███████ | 35/50 [00:22<00:09, 1.61ba/s]\n",
- " 72%|███████▏ | 36/50 [00:23<00:08, 1.60ba/s]\n",
- " 74%|███████▍ | 37/50 [00:23<00:08, 1.54ba/s]\n",
- " 76%|███████▌ | 38/50 [00:24<00:07, 1.56ba/s]\n",
- " 78%|███████▊ | 39/50 [00:24<00:07, 1.56ba/s]\n",
- " 80%|████████ | 40/50 [00:25<00:06, 1.57ba/s]\n",
- " 82%|████████▏ | 41/50 [00:26<00:05, 1.54ba/s]\n",
- " 84%|████████▍ | 42/50 [00:26<00:05, 1.55ba/s]\n",
- " 86%|████████▌ | 43/50 [00:27<00:04, 1.55ba/s]\n",
- " 88%|████████▊ | 44/50 [00:28<00:03, 1.54ba/s]\n",
- " 90%|█████████ | 45/50 [00:28<00:03, 1.56ba/s]\n",
- " 92%|█████████▏| 46/50 [00:29<00:02, 1.56ba/s]\n",
- " 94%|█████████▍| 47/50 [00:30<00:01, 1.54ba/s]\n",
- " 96%|█████████▌| 48/50 [00:30<00:01, 1.54ba/s]\n",
- " 98%|█████████▊| 49/50 [00:31<00:00, 1.54ba/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m len of train Dataset({\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m features: ['text', 'label', 'input_ids', 'attention_mask'],\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m num_rows: 100\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m }) and test Dataset({\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m features: ['text', 'label', 'input_ids', 'attention_mask'],\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m num_rows: 100\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m })\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " 98%|█████████▊| 49/50 [00:32<00:00, 1.53ba/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m To disable this warning, you can either:\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:55:58 (running for 00:00:05.07)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 6.4/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,047\tINFO torch.py:346 -- Setting up process group for: env:// [rank=0, world_size=4]\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,045\tINFO torch.py:346 -- Setting up process group for: env:// [rank=2, world_size=4]\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,047\tINFO torch.py:346 -- Setting up process group for: env:// [rank=1, world_size=4]\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,048\tINFO torch.py:346 -- Setting up process group for: env:// [rank=3, world_size=4]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:03 (running for 00:00:10.07)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 7.2/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Downloading: 100%|██████████| 483/483 [00:00<00:00, 588kB/s]\n",
- "Downloading: 0%| | 0.00/268M [00:00, ?B/s] \n",
- "Downloading: 0%| | 893k/268M [00:00<00:29, 8.93MB/s]\n",
- "Downloading: 3%|▎ | 6.70M/268M [00:00<00:06, 37.8MB/s]\n",
- "Downloading: 5%|▍ | 12.9M/268M [00:00<00:05, 48.7MB/s]\n",
- "Downloading: 7%|▋ | 19.2M/268M [00:00<00:04, 54.4MB/s]\n",
- "Downloading: 10%|▉ | 25.7M/268M [00:00<00:04, 58.3MB/s]\n",
- "Downloading: 12%|█▏ | 32.3M/268M [00:00<00:03, 60.8MB/s]\n",
- "Downloading: 14%|█▍ | 38.8M/268M [00:00<00:03, 62.1MB/s]\n",
- "Downloading: 17%|█▋ | 45.3M/268M [00:00<00:03, 63.3MB/s]\n",
- "Downloading: 19%|█▉ | 51.8M/268M [00:00<00:03, 63.7MB/s]\n",
- "Downloading: 22%|██▏ | 58.4M/268M [00:01<00:03, 64.3MB/s]\n",
- "Downloading: 24%|██▍ | 64.9M/268M [00:01<00:03, 64.7MB/s]\n",
- "Downloading: 27%|██▋ | 71.5M/268M [00:01<00:03, 65.2MB/s]\n",
- "Downloading: 29%|██▉ | 78.1M/268M [00:01<00:02, 65.1MB/s]\n",
- "Downloading: 32%|███▏ | 84.6M/268M [00:01<00:02, 65.1MB/s]\n",
- "Downloading: 34%|███▍ | 91.2M/268M [00:01<00:02, 65.3MB/s]\n",
- "Downloading: 36%|███▋ | 97.7M/268M [00:01<00:02, 65.3MB/s]\n",
- "Downloading: 39%|███▉ | 104M/268M [00:01<00:02, 65.4MB/s] \n",
- "Downloading: 41%|████▏ | 111M/268M [00:01<00:02, 65.5MB/s]\n",
- "Downloading: 44%|████▍ | 117M/268M [00:01<00:02, 65.5MB/s]\n",
- "Downloading: 46%|████▋ | 124M/268M [00:02<00:02, 65.4MB/s]\n",
- "Downloading: 49%|████▊ | 130M/268M [00:02<00:02, 65.4MB/s]\n",
- "Downloading: 51%|█████ | 137M/268M [00:02<00:01, 65.5MB/s]\n",
- "Downloading: 54%|█████▎ | 144M/268M [00:02<00:01, 65.5MB/s]\n",
- "Downloading: 56%|█████▌ | 150M/268M [00:02<00:01, 65.4MB/s]\n",
- "Downloading: 58%|█████▊ | 157M/268M [00:02<00:01, 65.4MB/s]\n",
- "Downloading: 61%|██████ | 163M/268M [00:02<00:01, 65.6MB/s]\n",
- "Downloading: 63%|██████▎ | 170M/268M [00:02<00:01, 65.4MB/s]\n",
- "Downloading: 66%|██████▌ | 176M/268M [00:02<00:01, 65.4MB/s]\n",
- "Downloading: 68%|██████▊ | 183M/268M [00:02<00:01, 65.4MB/s]\n",
- "Downloading: 71%|███████ | 190M/268M [00:03<00:01, 65.6MB/s]\n",
- "Downloading: 73%|███████▎ | 196M/268M [00:03<00:01, 65.6MB/s]\n",
- "Downloading: 76%|███████▌ | 203M/268M [00:03<00:00, 65.7MB/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:08 (running for 00:00:15.07)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 7.5/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Downloading: 78%|███████▊ | 209M/268M [00:03<00:00, 65.7MB/s]\n",
- "Downloading: 81%|████████ | 216M/268M [00:03<00:00, 65.7MB/s]\n",
- "Downloading: 83%|████████▎ | 223M/268M [00:03<00:00, 66.0MB/s]\n",
- "Downloading: 86%|████████▌ | 229M/268M [00:03<00:00, 66.0MB/s]\n",
- "Downloading: 88%|████████▊ | 236M/268M [00:03<00:00, 65.8MB/s]\n",
- "Downloading: 90%|█████████ | 242M/268M [00:03<00:00, 65.8MB/s]\n",
- "Downloading: 93%|█████████▎| 249M/268M [00:03<00:00, 65.7MB/s]\n",
- "Downloading: 95%|█████████▌| 255M/268M [00:04<00:00, 65.7MB/s]\n",
- "Downloading: 98%|█████████▊| 262M/268M [00:04<00:00, 65.8MB/s]\n",
- "Downloading: 100%|██████████| 268M/268M [00:04<00:00, 63.9MB/s]\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.weight']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m ***** Running training *****\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Num examples = 6250\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Num Epochs = 1\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Instantaneous batch size per device = 16\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Total train batch size (w. parallel, distributed & accumulation) = 64\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Gradient Accumulation steps = 1\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Total optimization steps = 391\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Number of trainable parameters = 66955010\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:13 (running for 00:00:20.08)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 12.3/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:18 (running for 00:00:25.08)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:23 (running for 00:00:30.08)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:28 (running for 00:00:35.09)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:33 (running for 00:00:40.09)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:38 (running for 00:00:45.10)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:43 (running for 00:00:50.10)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:48 (running for 00:00:55.10)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:53 (running for 00:01:00.10)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:59 (running for 00:01:05.11)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:04 (running for 00:01:10.11)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:09 (running for 00:01:15.11)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:14 (running for 00:01:20.12)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:19 (running for 00:01:25.12)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:24 (running for 00:01:30.12)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:29 (running for 00:01:35.13)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:34 (running for 00:01:40.13)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:39 (running for 00:01:45.13)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:44 (running for 00:01:50.13)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:49 (running for 00:01:55.14)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:54 (running for 00:02:00.14)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:59 (running for 00:02:05.15)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Saving model checkpoint to /tmp/hf_imdb/test/checkpoint-391\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Configuration saved in /tmp/hf_imdb/test/checkpoint-391/config.json\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Model weights saved in /tmp/hf_imdb/test/checkpoint-391/pytorch_model.bin\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result for HuggingFaceTrainer_c7d60_00000:\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _time_this_iter_s: 118.07144260406494\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _timestamp: 1667573883\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _training_iteration: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m date: 2022-11-04_07-58-03\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m done: false\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m epoch: 1.0\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m experiment_id: 7bc6ab25d0414fcbb589bcb5d0f29b99\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m hostname: hfgputest-worker-small-group-hfgputest-q4758\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m iterations_since_restore: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m node_ip: 10.129.66.16\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m pid: 146\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m should_checkpoint: true\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m step: 391\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_since_restore: 124.55581378936768\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_this_iter_s: 124.55581378936768\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_total_s: 124.55581378936768\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timestamp: 1667573883\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timesteps_since_restore: 0\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_loss: 0.2760564701636429\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_runtime: 109.7668\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_samples_per_second: 56.939\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_steps_per_second: 3.562\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m training_iteration: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m trial_id: c7d60_00000\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m warmup_time: 0.003995656967163086\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m {'train_runtime': 109.7668, 'train_samples_per_second': 56.939, 'train_steps_per_second': 3.562, 'train_loss': 0.2760564701636429, 'epoch': 1.0}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Training completed. Do not forget to share your model on huggingface.co/models =)\n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n",
- "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:58:13 (running for 00:02:19.36)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 16.0/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc | iter | total time (s) | train_runtime | train_samples_per_second | train_steps_per_second |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------+--------+------------------+-----------------+----------------------------+--------------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 | 1 | 124.556 | 109.767 | 56.939 | 3.562 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:13,248\tWARNING util.py:214 -- The `process_trial_save` operation took 9.709 s, which may be a performance bottleneck.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:13,248\tWARNING trial_runner.py:856 -- Consider turning off forced head-worker trial checkpoint syncs by setting sync_on_checkpoint=False. Note that this may result in faulty trial restoration if a failure occurs while the checkpoint is being synced from the worker to the head node.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result for HuggingFaceTrainer_c7d60_00000:\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _time_this_iter_s: 118.07144260406494\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _timestamp: 1667573883\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _training_iteration: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m date: 2022-11-04_07-58-03\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m done: true\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m epoch: 1.0\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m experiment_id: 7bc6ab25d0414fcbb589bcb5d0f29b99\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m experiment_tag: '0'\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m hostname: hfgputest-worker-small-group-hfgputest-q4758\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m iterations_since_restore: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m node_ip: 10.129.66.16\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m pid: 146\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m should_checkpoint: true\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m step: 391\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_since_restore: 124.55581378936768\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_this_iter_s: 124.55581378936768\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_total_s: 124.55581378936768\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timestamp: 1667573883\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timesteps_since_restore: 0\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_loss: 0.2760564701636429\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_runtime: 109.7668\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_samples_per_second: 56.939\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_steps_per_second: 3.562\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m training_iteration: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m trial_id: c7d60_00000\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m warmup_time: 0.003995656967163086\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:58:16 (running for 00:02:22.40)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 9.1/240.1 GiB\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 0/10 CPUs, 0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 TERMINATED)\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+------------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc | iter | total time (s) | train_runtime | train_samples_per_second | train_steps_per_second |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+------------+------------------+--------+------------------+-----------------+----------------------------+--------------------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | TERMINATED | 10.129.66.16:146 | 1 | 124.556 | 109.767 | 56.939 | 3.562 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+------------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:16,286\tWARNING util.py:214 -- The `process_trial_save` operation took 2.161 s, which may be a performance bottleneck.\n",
- "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:16,398\tINFO tune.py:747 -- Total run time: 142.70 seconds (142.40 seconds for the tuning loop).\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"#call the above cell as a remote ray function\n",
"ray.get(train_fn.remote())"
diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb
index 1b62b988e..6c3aa2ac5 100644
--- a/demo-notebooks/additional-demos/local_interactive.ipynb
+++ b/demo-notebooks/additional-demos/local_interactive.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "9a44568b-61ef-41c7-8ad1-9a3b128f03a7",
"metadata": {
"tags": []
@@ -69,7 +69,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "69968140-15e6-482f-9529-82b0cd19524b",
"metadata": {
"tags": []
@@ -81,21 +81,12 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "e20f9982-f671-460b-8c22-3d62e101fed9",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Waiting for requested resources to be set up...\n",
- "Requested cluster up and running!\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"cluster.wait_ready()"
]
@@ -123,82 +114,12 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "9483bb98-33b3-4beb-9b15-163d7e76c1d7",
"metadata": {
- "scrolled": true,
"tags": []
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-06-27 19:14:16,088\tINFO client_builder.py:251 -- Passing the following kwargs to ray.init() on the server: logging_level\n",
- "2023-06-27 19:14:16,100\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.IDLE\n",
- "2023-06-27 19:14:16,308\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.CONNECTING\n",
- "2023-06-27 19:14:16,434\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.READY\n",
- "2023-06-27 19:14:16,436\tDEBUG worker.py:807 -- Pinging server.\n",
- "2023-06-27 19:14:18,634\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000001000000\n",
- "2023-06-27 19:14:18,635\tDEBUG worker.py:564 -- Scheduling task get_dashboard_url 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x01\\x00\\x00\\x00'\n",
- "2023-06-27 19:14:18,645\tDEBUG worker.py:640 -- Retaining c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n",
- "2023-06-27 19:14:19,454\tDEBUG worker.py:636 -- Releasing c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "
\n",
- "
Ray
\n",
- "
\n",
- "
\n",
- " \n",
- " Python version: | \n",
- " 3.8.13 | \n",
- "
\n",
- " \n",
- " Ray version: | \n",
- " 2.1.0 | \n",
- "
\n",
- " \n",
- " Dashboard: | \n",
- " http://10.254.20.41:8265 | \n",
- "
\n",
- "\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "text/plain": [
- "ClientContext(dashboard_url='10.254.20.41:8265', python_version='3.8.13', ray_version='2.1.0', ray_commit='23f34d948dae8de9b168667ab27e6cf940b3ae85', protocol_version='2022-10-05', _num_clients=1, _context_to_restore=)"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import ray\n",
"\n",
@@ -208,7 +129,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "3436eb4a-217c-4109-a3c3-309fda7e2442",
"metadata": {},
"outputs": [],
@@ -232,72 +153,32 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "5cca1874-2be3-4631-ae48-9adfa45e3af3",
"metadata": {
- "scrolled": true,
"tags": []
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-06-27 19:14:28,222\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000002000000\n",
- "2023-06-27 19:14:28,222\tDEBUG worker.py:564 -- Scheduling task heavy_calculation 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00'\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"ref = heavy_calculation.remote(3000)"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "01172c29-e8bf-41ef-8db5-eccb07906111",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-06-27 19:14:29,202\tDEBUG worker.py:640 -- Retaining 16310a0f0a45af5cffffffffffffffffffffffff0100000001000000\n",
- "2023-06-27 19:14:31,224\tDEBUG worker.py:439 -- Internal retry for get [ClientObjectRef(16310a0f0a45af5cffffffffffffffffffffffff0100000001000000)]\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "1789.4644387076714"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"ray.get(ref)"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "9e79b547-a457-4232-b77d-19147067b972",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-06-27 19:14:33,161\tDEBUG dataclient.py:287 -- Got unawaited response connection_cleanup {\n",
- "}\n",
- "\n",
- "2023-06-27 19:14:34,460\tDEBUG dataclient.py:278 -- Shutting down data channel.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"ray.cancel(ref)\n",
"ray.shutdown()"
@@ -305,7 +186,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "2c198f1f-68bf-43ff-a148-02b5cb000ff2",
"metadata": {},
"outputs": [],
diff --git a/demo-notebooks/additional-demos/ray_job_client.ipynb b/demo-notebooks/additional-demos/ray_job_client.ipynb
index 828b34696..2f43306e9 100644
--- a/demo-notebooks/additional-demos/ray_job_client.ipynb
+++ b/demo-notebooks/additional-demos/ray_job_client.ipynb
@@ -36,15 +36,15 @@
]
},
{
- "cell_type": "markdown",
- "id": "18de2d65",
- "metadata": {},
- "source": [
- "\n",
- "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
- "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
- ]
- },
+ "cell_type": "markdown",
+ "id": "18de2d65",
+ "metadata": {},
+ "source": [
+ "\n",
+ "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
+ "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/demo-notebooks/guided-demos/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/1_cluster_job_client.ipynb
index eb37f70b8..56585e373 100644
--- a/demo-notebooks/guided-demos/1_cluster_job_client.ipynb
+++ b/demo-notebooks/guided-demos/1_cluster_job_client.ipynb
@@ -35,16 +35,16 @@
]
},
{
- "cell_type": "markdown",
- "id": "bc27f84c",
- "metadata": {},
- "source": [
- "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n",
- "\n",
- "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
- "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
- ]
- },
+ "cell_type": "markdown",
+ "id": "bc27f84c",
+ "metadata": {},
+ "source": [
+ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n",
+ "\n",
+ "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
+ "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb
index dc2073760..03270e8b7 100644
--- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb
+++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb
@@ -13,7 +13,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a",
"metadata": {},
"outputs": [],
@@ -53,18 +53,10 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "0f4bc870-091f-4e11-9642-cba145710159",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Written to: raytest.yaml\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Create and configure our cluster object\n",
"# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
@@ -93,7 +85,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200",
"metadata": {},
"outputs": [],
@@ -112,125 +104,30 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "3c1b4311-2e61-44c9-8225-87c2db11363d",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "╭───────────────────────╮\n",
- "│ 🚀 Cluster Queue │\n",
- "│ Status 🚀 │\n",
- "│ +---------+---------+ │\n",
- "│ | Name | Status | │\n",
- "│ +=========+=========+ │\n",
- "│ | raytest | pending | │\n",
- "│ | | | │\n",
- "│ +---------+---------+ │\n",
- "╰───────────────────────╯\n",
- "
\n"
- ],
- "text/plain": [
- "╭───────────────────────╮\n",
- "│ \u001b[3m \u001b[0m\u001b[1;3m 🚀 Cluster Queue\u001b[0m\u001b[3m \u001b[0m │\n",
- "│ \u001b[3m \u001b[0m\u001b[1;3mStatus 🚀\u001b[0m\u001b[3m \u001b[0m │\n",
- "│ +---------+---------+ │\n",
- "│ |\u001b[1m \u001b[0m\u001b[1mName \u001b[0m\u001b[1m \u001b[0m|\u001b[1m \u001b[0m\u001b[1mStatus \u001b[0m\u001b[1m \u001b[0m| │\n",
- "│ +=========+=========+ │\n",
- "│ |\u001b[36m \u001b[0m\u001b[36mraytest\u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35mpending\u001b[0m\u001b[35m \u001b[0m| │\n",
- "│ |\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m| │\n",
- "│ +---------+---------+ │\n",
- "╰───────────────────────╯\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "(, False)"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cluster.status()"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "a99d5aff",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Waiting for requested resources to be set up...\n",
- "Requested cluster up and running!\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"cluster.wait_ready()"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "df71c1ed",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " 🚀 CodeFlare Cluster Status 🚀 \n",
- " \n",
- " ╭──────────────────────────────────────────────────────────────╮ \n",
- " │ Name │ \n",
- " │ raytest Active ✅ │ \n",
- " │ │ \n",
- " │ URI: ray://raytest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ Dashboard🔗 │ \n",
- " │ │ \n",
- " ╰──────────────────────────────────────────────────────────────╯ \n",
- "
\n"
- ],
- "text/plain": [
- "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Status 🚀\u001b[0m\u001b[3m \u001b[0m\n",
- "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n",
- " ╭──────────────────────────────────────────────────────────────╮ \n",
- " │ \u001b[1;37;42mName\u001b[0m │ \n",
- " │ \u001b[1;4mraytest\u001b[0m Active ✅ │ \n",
- " │ │ \n",
- " │ \u001b[1mURI:\u001b[0m ray://raytest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ \u001b]8;id=630217;ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n",
- " │ │ \n",
- " ╰──────────────────────────────────────────────────────────────╯ \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "(, True)"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cluster.status()"
]
@@ -245,68 +142,10 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " 🚀 CodeFlare Cluster Details 🚀 \n",
- " \n",
- " ╭───────────────────────────────────────────────────────────────╮ \n",
- " │ Name │ \n",
- " │ raytest Active ✅ │ \n",
- " │ │ \n",
- " │ URI: ray://raytest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ Dashboard🔗 │ \n",
- " │ │ \n",
- " │ Cluster Resources │ \n",
- " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
- " │ │ # Workers │ │ Memory CPU GPU │ │ \n",
- " │ │ │ │ │ │ \n",
- " │ │ 2 │ │ 4~4 1 0 │ │ \n",
- " │ │ │ │ │ │ \n",
- " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n",
- " ╰───────────────────────────────────────────────────────────────╯ \n",
- "
\n"
- ],
- "text/plain": [
- "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n",
- "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n",
- " ╭───────────────────────────────────────────────────────────────╮ \n",
- " │ \u001b[1;37;42mName\u001b[0m │ \n",
- " │ \u001b[1;4mraytest\u001b[0m Active ✅ │ \n",
- " │ │ \n",
- " │ \u001b[1mURI:\u001b[0m ray://raytest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ \u001b]8;id=623965;http://ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n",
- " │ │ \n",
- " │ \u001b[3m Cluster Resources \u001b[0m │ \n",
- " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
- " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n",
- " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m4~4 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n",
- " ╰───────────────────────────────────────────────────────────────╯ \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "RayCluster(name='raytest', status=, workers=2, worker_mem_min=4, worker_mem_max=4, worker_cpu=1, worker_gpu=0, namespace='default', dashboard='http://ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cluster.details()"
]
@@ -321,7 +160,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57",
"metadata": {},
"outputs": [],
diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb
index 903be9f00..1a5e77f4e 100644
--- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb
+++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb
@@ -35,16 +35,16 @@
]
},
{
- "cell_type": "markdown",
- "id": "bc27f84c",
- "metadata": {},
- "source": [
- "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n",
- "\n",
- "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
- "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
- ]
- },
+ "cell_type": "markdown",
+ "id": "bc27f84c",
+ "metadata": {},
+ "source": [
+ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n",
+ "\n",
+ "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
+ "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb
index eb6c8977b..08eaf0b81 100644
--- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb
+++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb
@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a",
"metadata": {},
"outputs": [],
@@ -50,18 +50,10 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "0f4bc870-091f-4e11-9642-cba145710159",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Written to: interactivetest.yaml\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Create and configure our cluster object\n",
"# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
@@ -83,19 +75,10 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Waiting for requested resources to be set up...\n",
- "Requested cluster up and running!\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Bring up the cluster\n",
"cluster.up()\n",
@@ -104,68 +87,10 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "df71c1ed",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " 🚀 CodeFlare Cluster Details 🚀 \n",
- " \n",
- " ╭──────────────────────────────────────────────────────────────────────╮ \n",
- " │ Name │ \n",
- " │ interactivetest Active ✅ │ \n",
- " │ │ \n",
- " │ URI: ray://interactivetest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ Dashboard🔗 │ \n",
- " │ │ \n",
- " │ Cluster Resources │ \n",
- " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
- " │ │ # Workers │ │ Memory CPU GPU │ │ \n",
- " │ │ │ │ │ │ \n",
- " │ │ 2 │ │ 8~8 2 1 │ │ \n",
- " │ │ │ │ │ │ \n",
- " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n",
- " ╰──────────────────────────────────────────────────────────────────────╯ \n",
- "
\n"
- ],
- "text/plain": [
- "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n",
- "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n",
- " ╭──────────────────────────────────────────────────────────────────────╮ \n",
- " │ \u001b[1;37;42mName\u001b[0m │ \n",
- " │ \u001b[1;4minteractivetest\u001b[0m Active ✅ │ \n",
- " │ │ \n",
- " │ \u001b[1mURI:\u001b[0m ray://interactivetest-head-svc.default.svc:10001 │ \n",
- " │ │ \n",
- " │ \u001b]8;id=970589;http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n",
- " │ │ \n",
- " │ \u001b[3m Cluster Resources \u001b[0m │ \n",
- " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
- " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n",
- " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
- " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n",
- " ╰──────────────────────────────────────────────────────────────────────╯ \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "RayCluster(name='interactivetest', status=, workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cluster.details()"
]
@@ -182,19 +107,10 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "c1719bca",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\n",
- "ray://interactivetest-head-svc.default.svc:10001\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"ray_dashboard_uri = cluster.cluster_dashboard_uri()\n",
"ray_cluster_uri = cluster.cluster_uri()\n",
@@ -225,18 +141,10 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "300146dc",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Ray cluster is up and running: True\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"#before proceeding make sure the cluster exists and the uri is not empty\n",
"assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n",
@@ -266,7 +174,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "1b36e0d9",
"metadata": {},
"outputs": [],
@@ -362,1111 +270,10 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "5901d958",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Downloading builder script: 100%|██████████| 4.31k/4.31k [00:00<00:00, 20.9MB/s]\n",
- "Downloading metadata: 100%|██████████| 2.17k/2.17k [00:00<00:00, 14.1MB/s]\n",
- "Downloading readme: 100%|██████████| 7.59k/7.59k [00:00<00:00, 22.9MB/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Downloading and preparing dataset imdb/plain_text to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Downloading data: 0%| | 0.00/84.1M [00:00, ?B/s]\n",
- "Downloading data: 1%| | 738k/84.1M [00:00<00:11, 7.34MB/s]\n",
- "Downloading data: 6%|▌ | 4.88M/84.1M [00:00<00:02, 27.1MB/s]\n",
- "Downloading data: 14%|█▍ | 11.8M/84.1M [00:00<00:01, 46.2MB/s]\n",
- "Downloading data: 23%|██▎ | 19.5M/84.1M [00:00<00:01, 58.3MB/s]\n",
- "Downloading data: 32%|███▏ | 27.3M/84.1M [00:00<00:00, 65.5MB/s]\n",
- "Downloading data: 42%|████▏ | 35.2M/84.1M [00:00<00:00, 70.0MB/s]\n",
- "Downloading data: 51%|█████ | 43.1M/84.1M [00:00<00:00, 72.9MB/s]\n",
- "Downloading data: 61%|██████ | 51.0M/84.1M [00:00<00:00, 74.8MB/s]\n",
- "Downloading data: 70%|███████ | 59.0M/84.1M [00:00<00:00, 76.5MB/s]\n",
- "Downloading data: 80%|███████▉ | 66.9M/84.1M [00:01<00:00, 77.4MB/s]\n",
- "Downloading data: 89%|████████▉ | 75.0M/84.1M [00:01<00:00, 78.2MB/s]\n",
- "Downloading data: 100%|██████████| 84.1M/84.1M [00:01<00:00, 69.2MB/s]\n",
- "Generating train split: 0%| | 0/25000 [00:00, ? examples/s]\n",
- "Generating train split: 0%| | 1/25000 [00:01<11:31:29, 1.66s/ examples]\n",
- "Generating train split: 4%|▍ | 1022/25000 [00:01<00:29, 809.65 examples/s]\n",
- "Generating train split: 9%|▊ | 2128/25000 [00:01<00:12, 1851.90 examples/s]\n",
- "Generating train split: 13%|█▎ | 3218/25000 [00:01<00:07, 2999.09 examples/s]\n",
- "Generating train split: 17%|█▋ | 4319/25000 [00:02<00:04, 4225.90 examples/s]\n",
- "Generating train split: 22%|██▏ | 5390/25000 [00:02<00:03, 5389.63 examples/s]\n",
- "Generating train split: 26%|██▌ | 6488/25000 [00:02<00:02, 6530.17 examples/s]\n",
- "Generating train split: 30%|███ | 7594/25000 [00:02<00:02, 7547.93 examples/s]\n",
- "Generating train split: 37%|███▋ | 9185/25000 [00:02<00:01, 8542.08 examples/s]\n",
- "Generating train split: 41%|████ | 10289/25000 [00:02<00:01, 9132.68 examples/s]\n",
- "Generating train split: 46%|████▌ | 11395/25000 [00:02<00:01, 9616.88 examples/s]\n",
- "Generating train split: 50%|████▉ | 12495/25000 [00:02<00:01, 9982.11 examples/s]\n",
- "Generating train split: 54%|█████▍ | 13586/25000 [00:02<00:01, 10233.82 examples/s]\n",
- "Generating train split: 61%|██████ | 15198/25000 [00:03<00:00, 10420.44 examples/s]\n",
- "Generating train split: 65%|██████▌ | 16284/25000 [00:03<00:00, 10532.91 examples/s]\n",
- "Generating train split: 70%|██████▉ | 17379/25000 [00:03<00:00, 10644.61 examples/s]\n",
- "Generating train split: 74%|███████▍ | 18474/25000 [00:03<00:00, 10729.38 examples/s]\n",
- "Generating train split: 78%|███████▊ | 19575/25000 [00:03<00:00, 10806.50 examples/s]\n",
- "Generating train split: 85%|████████▍ | 21189/25000 [00:03<00:00, 10785.22 examples/s]\n",
- "Generating train split: 91%|█████████▏| 22842/25000 [00:03<00:00, 10865.13 examples/s]\n",
- "Generating train split: 98%|█████████▊| 24445/25000 [00:03<00:00, 10803.63 examples/s]\n",
- "Generating test split: 0%| | 0/25000 [00:00, ? examples/s] \n",
- "Generating test split: 0%| | 1/25000 [00:00<2:54:01, 2.39 examples/s]\n",
- "Generating test split: 4%|▍ | 1074/25000 [00:00<00:08, 2734.76 examples/s]\n",
- "Generating test split: 9%|▊ | 2140/25000 [00:00<00:04, 4847.64 examples/s]\n",
- "Generating test split: 12%|█▏ | 3049/25000 [00:00<00:03, 6016.37 examples/s]\n",
- "Generating test split: 17%|█▋ | 4150/25000 [00:00<00:02, 7426.18 examples/s]\n",
- "Generating test split: 21%|██ | 5250/25000 [00:00<00:02, 8451.30 examples/s]\n",
- "Generating test split: 25%|██▌ | 6342/25000 [00:01<00:02, 9165.11 examples/s]\n",
- "Generating test split: 30%|██▉ | 7458/25000 [00:01<00:01, 9745.16 examples/s]\n",
- "Generating test split: 34%|███▍ | 8564/25000 [00:01<00:01, 10130.34 examples/s]\n",
- "Generating test split: 39%|███▊ | 9646/25000 [00:01<00:01, 10325.20 examples/s]\n",
- "Generating test split: 43%|████▎ | 10729/25000 [00:01<00:01, 10472.33 examples/s]\n",
- "Generating test split: 47%|████▋ | 11834/25000 [00:01<00:01, 10642.94 examples/s]\n",
- "Generating test split: 54%|█████▍ | 13453/25000 [00:01<00:01, 10700.70 examples/s]\n",
- "Generating test split: 58%|█████▊ | 14575/25000 [00:01<00:00, 10805.59 examples/s]\n",
- "Generating test split: 63%|██████▎ | 15687/25000 [00:01<00:00, 10885.11 examples/s]\n",
- "Generating test split: 67%|██████▋ | 16800/25000 [00:01<00:00, 10952.30 examples/s]\n",
- "Generating test split: 74%|███████▎ | 18420/25000 [00:02<00:00, 10891.20 examples/s]\n",
- "Generating test split: 78%|███████▊ | 19528/25000 [00:02<00:00, 10938.21 examples/s]\n",
- "Generating test split: 83%|████████▎ | 20658/25000 [00:02<00:00, 11037.04 examples/s]\n",
- "Generating test split: 89%|████████▉ | 22286/25000 [00:02<00:00, 10960.86 examples/s]\n",
- "Generating test split: 96%|█████████▌| 23938/25000 [00:02<00:00, 10976.44 examples/s]\n",
- "Generating unsupervised split: 0%| | 0/50000 [00:00, ? examples/s] \n",
- "Generating unsupervised split: 0%| | 1/50000 [00:02<40:58:55, 2.95s/ examples]\n",
- "Generating unsupervised split: 2%|▏ | 1075/50000 [00:03<01:38, 496.25 examples/s]\n",
- "Generating unsupervised split: 4%|▍ | 2129/50000 [00:03<00:42, 1117.77 examples/s]\n",
- "Generating unsupervised split: 6%|▋ | 3230/50000 [00:03<00:24, 1921.11 examples/s]\n",
- "Generating unsupervised split: 9%|▊ | 4338/50000 [00:03<00:15, 2873.07 examples/s]\n",
- "Generating unsupervised split: 11%|█ | 5442/50000 [00:03<00:11, 3928.58 examples/s]\n",
- "Generating unsupervised split: 13%|█▎ | 6559/50000 [00:03<00:08, 5047.37 examples/s]\n",
- "Generating unsupervised split: 15%|█▌ | 7656/50000 [00:03<00:06, 6121.59 examples/s]\n",
- "Generating unsupervised split: 18%|█▊ | 8767/50000 [00:03<00:05, 7149.80 examples/s]\n",
- "Generating unsupervised split: 20%|█▉ | 9850/50000 [00:03<00:05, 7985.57 examples/s]\n",
- "Generating unsupervised split: 22%|██▏ | 10943/50000 [00:03<00:04, 8705.92 examples/s]\n",
- "Generating unsupervised split: 25%|██▌ | 12544/50000 [00:04<00:03, 9378.86 examples/s]\n",
- "Generating unsupervised split: 27%|██▋ | 13652/50000 [00:04<00:03, 9797.34 examples/s]\n",
- "Generating unsupervised split: 30%|██▉ | 14763/50000 [00:04<00:03, 10138.25 examples/s]\n",
- "Generating unsupervised split: 32%|███▏ | 15873/50000 [00:04<00:03, 10398.21 examples/s]\n",
- "Generating unsupervised split: 35%|███▌ | 17506/50000 [00:04<00:03, 10574.88 examples/s]\n",
- "Generating unsupervised split: 37%|███▋ | 18617/50000 [00:04<00:02, 10713.10 examples/s]\n",
- "Generating unsupervised split: 40%|███▉ | 19801/50000 [00:04<00:03, 8478.31 examples/s] \n",
- "Generating unsupervised split: 42%|████▏ | 20891/50000 [00:04<00:03, 9032.00 examples/s]\n",
- "Generating unsupervised split: 44%|████▍ | 21968/50000 [00:05<00:02, 9458.88 examples/s]\n",
- "Generating unsupervised split: 46%|████▌ | 23006/50000 [00:05<00:02, 9697.89 examples/s]\n",
- "Generating unsupervised split: 48%|████▊ | 24105/50000 [00:05<00:02, 10044.77 examples/s]\n",
- "Generating unsupervised split: 50%|█████ | 25199/50000 [00:05<00:02, 10289.30 examples/s]\n",
- "Generating unsupervised split: 53%|█████▎ | 26282/50000 [00:05<00:02, 10442.13 examples/s]\n",
- "Generating unsupervised split: 55%|█████▍ | 27384/50000 [00:05<00:02, 10605.77 examples/s]\n",
- "Generating unsupervised split: 57%|█████▋ | 28493/50000 [00:05<00:02, 10744.66 examples/s]\n",
- "Generating unsupervised split: 59%|█████▉ | 29591/50000 [00:05<00:01, 10809.48 examples/s]\n",
- "Generating unsupervised split: 61%|██████▏ | 30684/50000 [00:05<00:01, 10841.97 examples/s]\n",
- "Generating unsupervised split: 65%|██████▍ | 32251/50000 [00:06<00:01, 10685.34 examples/s]\n",
- "Generating unsupervised split: 68%|██████▊ | 33861/50000 [00:06<00:01, 10698.40 examples/s]\n",
- "Generating unsupervised split: 70%|██████▉ | 34952/50000 [00:06<00:01, 10750.14 examples/s]\n",
- "Generating unsupervised split: 73%|███████▎ | 36600/50000 [00:06<00:01, 10829.69 examples/s]\n",
- "Generating unsupervised split: 75%|███████▌ | 37713/50000 [00:06<00:01, 10903.43 examples/s]\n",
- "Generating unsupervised split: 79%|███████▊ | 39317/50000 [00:06<00:00, 10828.58 examples/s]\n",
- "Generating unsupervised split: 81%|████████ | 40422/50000 [00:06<00:00, 10882.20 examples/s]\n",
- "Generating unsupervised split: 83%|████████▎ | 41520/50000 [00:06<00:00, 10907.05 examples/s]\n",
- "Generating unsupervised split: 85%|████████▌ | 42624/50000 [00:06<00:00, 10940.31 examples/s]\n",
- "Generating unsupervised split: 87%|████████▋ | 43726/50000 [00:07<00:00, 10961.40 examples/s]\n",
- "Generating unsupervised split: 91%|█████████ | 45333/50000 [00:07<00:00, 10862.47 examples/s]\n",
- "Generating unsupervised split: 93%|█████████▎| 46439/50000 [00:07<00:00, 10913.03 examples/s]\n",
- "Generating unsupervised split: 95%|█████████▌| 47557/50000 [00:07<00:00, 10904.55 examples/s]\n",
- "Generating unsupervised split: 97%|█████████▋| 48664/50000 [00:07<00:00, 10941.00 examples/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Dataset imdb downloaded and prepared to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0. Subsequent calls will reuse this data.\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 3/3 [00:00<00:00, 599.79it/s] \n",
- "Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 8.59kB/s]\n",
- "Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 163kB/s]\n",
- "Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 3.72MB/s]\n",
- "Downloading (…)/main/tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 44.8MB/s]\n",
- "Map: 0%| | 0/25000 [00:00, ? examples/s]\n",
- "Map: 4%|▍ | 1000/25000 [00:00<00:13, 1733.22 examples/s]\n",
- "Map: 8%|▊ | 2000/25000 [00:01<00:12, 1866.13 examples/s]\n",
- "Map: 12%|█▏ | 3000/25000 [00:01<00:11, 1887.41 examples/s]\n",
- "Map: 16%|█▌ | 4000/25000 [00:02<00:11, 1898.51 examples/s]\n",
- "Map: 20%|██ | 5000/25000 [00:02<00:10, 1828.14 examples/s]\n",
- "Map: 24%|██▍ | 6000/25000 [00:03<00:10, 1841.43 examples/s]\n",
- "Map: 28%|██▊ | 7000/25000 [00:03<00:09, 1849.60 examples/s]\n",
- "Map: 32%|███▏ | 8000/25000 [00:04<00:09, 1817.98 examples/s]\n",
- "Map: 36%|███▌ | 9000/25000 [00:04<00:08, 1815.54 examples/s]\n",
- "Map: 40%|████ | 10000/25000 [00:05<00:08, 1814.58 examples/s]\n",
- "Map: 44%|████▍ | 11000/25000 [00:05<00:07, 1834.65 examples/s]\n",
- "Map: 48%|████▊ | 12000/25000 [00:06<00:06, 1864.77 examples/s]\n",
- "Map: 52%|█████▏ | 13000/25000 [00:07<00:06, 1877.35 examples/s]\n",
- "Map: 56%|█████▌ | 14000/25000 [00:07<00:05, 1835.22 examples/s]\n",
- "Map: 60%|██████ | 15000/25000 [00:08<00:05, 1838.61 examples/s]\n",
- "Map: 64%|██████▍ | 16000/25000 [00:08<00:04, 1833.90 examples/s]\n",
- "Map: 68%|██████▊ | 17000/25000 [00:09<00:04, 1749.31 examples/s]\n",
- "Map: 72%|███████▏ | 18000/25000 [00:09<00:03, 1787.07 examples/s]\n",
- "Map: 76%|███████▌ | 19000/25000 [00:10<00:03, 1803.74 examples/s]\n",
- "Map: 80%|████████ | 20000/25000 [00:10<00:02, 1794.89 examples/s]\n",
- "Map: 84%|████████▍ | 21000/25000 [00:11<00:02, 1692.55 examples/s]\n",
- "Map: 88%|████████▊ | 22000/25000 [00:12<00:01, 1723.77 examples/s]\n",
- "Map: 92%|█████████▏| 23000/25000 [00:12<00:01, 1761.44 examples/s]\n",
- "Map: 96%|█████████▌| 24000/25000 [00:13<00:00, 1781.81 examples/s]\n",
- "Map: 0%| | 0/25000 [00:00, ? examples/s] \n",
- "Map: 4%|▍ | 1000/25000 [00:00<00:12, 1926.28 examples/s]\n",
- "Map: 8%|▊ | 2000/25000 [00:01<00:11, 1948.38 examples/s]\n",
- "Map: 12%|█▏ | 3000/25000 [00:01<00:11, 1917.45 examples/s]\n",
- "Map: 16%|█▌ | 4000/25000 [00:02<00:11, 1904.55 examples/s]\n",
- "Map: 20%|██ | 5000/25000 [00:02<00:10, 1871.95 examples/s]\n",
- "Map: 24%|██▍ | 6000/25000 [00:03<00:10, 1852.86 examples/s]\n",
- "Map: 28%|██▊ | 7000/25000 [00:03<00:09, 1872.66 examples/s]\n",
- "Map: 32%|███▏ | 8000/25000 [00:04<00:09, 1844.53 examples/s]\n",
- "Map: 36%|███▌ | 9000/25000 [00:04<00:08, 1834.37 examples/s]\n",
- "Map: 40%|████ | 10000/25000 [00:05<00:08, 1806.45 examples/s]\n",
- "Map: 44%|████▍ | 11000/25000 [00:05<00:07, 1842.15 examples/s]\n",
- "Map: 48%|████▊ | 12000/25000 [00:06<00:07, 1857.11 examples/s]\n",
- "Map: 52%|█████▏ | 13000/25000 [00:07<00:06, 1807.03 examples/s]\n",
- "Map: 56%|█████▌ | 14000/25000 [00:07<00:05, 1845.42 examples/s]\n",
- "Map: 60%|██████ | 15000/25000 [00:08<00:05, 1857.36 examples/s]\n",
- "Map: 64%|██████▍ | 16000/25000 [00:08<00:04, 1833.66 examples/s]\n",
- "Map: 68%|██████▊ | 17000/25000 [00:09<00:04, 1846.19 examples/s]\n",
- "Map: 72%|███████▏ | 18000/25000 [00:09<00:03, 1823.49 examples/s]\n",
- "Map: 76%|███████▌ | 19000/25000 [00:10<00:03, 1838.20 examples/s]\n",
- "Map: 80%|████████ | 20000/25000 [00:10<00:02, 1746.57 examples/s]\n",
- "Map: 84%|████████▍ | 21000/25000 [00:11<00:02, 1813.89 examples/s]\n",
- "Map: 88%|████████▊ | 22000/25000 [00:12<00:01, 1750.50 examples/s]\n",
- "Map: 92%|█████████▏| 23000/25000 [00:12<00:01, 1781.89 examples/s]\n",
- "Map: 96%|█████████▌| 24000/25000 [00:13<00:00, 1827.58 examples/s]\n",
- "Map: 100%|██████████| 25000/25000 [00:13<00:00, 1814.89 examples/s]\n",
- "Map: 0%| | 0/50000 [00:00, ? examples/s] \n",
- "Map: 2%|▏ | 1000/50000 [00:00<00:25, 1952.32 examples/s]\n",
- "Map: 4%|▍ | 2000/50000 [00:01<00:25, 1891.82 examples/s]\n",
- "Map: 6%|▌ | 3000/50000 [00:01<00:25, 1832.09 examples/s]\n",
- "Map: 8%|▊ | 4000/50000 [00:02<00:25, 1835.46 examples/s]\n",
- "Map: 10%|█ | 5000/50000 [00:02<00:24, 1815.46 examples/s]\n",
- "Map: 12%|█▏ | 6000/50000 [00:03<00:24, 1792.69 examples/s]\n",
- "Map: 14%|█▍ | 7000/50000 [00:03<00:24, 1790.63 examples/s]\n",
- "Map: 16%|█▌ | 8000/50000 [00:04<00:23, 1793.39 examples/s]\n",
- "Map: 18%|█▊ | 9000/50000 [00:04<00:22, 1823.55 examples/s]\n",
- "Map: 20%|██ | 10000/50000 [00:05<00:22, 1802.53 examples/s]\n",
- "Map: 22%|██▏ | 11000/50000 [00:06<00:21, 1776.32 examples/s]\n",
- "Map: 24%|██▍ | 12000/50000 [00:06<00:21, 1806.96 examples/s]\n",
- "Map: 26%|██▌ | 13000/50000 [00:07<00:20, 1827.85 examples/s]\n",
- "Map: 28%|██▊ | 14000/50000 [00:07<00:19, 1831.94 examples/s]\n",
- "Map: 30%|███ | 15000/50000 [00:08<00:19, 1803.46 examples/s]\n",
- "Map: 32%|███▏ | 16000/50000 [00:08<00:18, 1802.66 examples/s]\n",
- "Map: 34%|███▍ | 17000/50000 [00:09<00:18, 1809.01 examples/s]\n",
- "Map: 36%|███▌ | 18000/50000 [00:09<00:17, 1818.25 examples/s]\n",
- "Map: 38%|███▊ | 19000/50000 [00:10<00:18, 1720.56 examples/s]\n",
- "Map: 40%|████ | 20000/50000 [00:11<00:17, 1692.87 examples/s]\n",
- "Map: 42%|████▏ | 21000/50000 [00:11<00:16, 1739.68 examples/s]\n",
- "Map: 44%|████▍ | 22000/50000 [00:12<00:15, 1763.26 examples/s]\n",
- "Map: 46%|████▌ | 23000/50000 [00:12<00:15, 1789.31 examples/s]\n",
- "Map: 48%|████▊ | 24000/50000 [00:13<00:14, 1795.21 examples/s]\n",
- "Map: 50%|█████ | 25000/50000 [00:13<00:13, 1812.24 examples/s]\n",
- "Map: 52%|█████▏ | 26000/50000 [00:14<00:13, 1838.29 examples/s]\n",
- "Map: 54%|█████▍ | 27000/50000 [00:14<00:12, 1821.26 examples/s]\n",
- "Map: 56%|█████▌ | 28000/50000 [00:15<00:11, 1870.39 examples/s]\n",
- "Map: 58%|█████▊ | 29000/50000 [00:16<00:11, 1795.22 examples/s]\n",
- "Map: 60%|██████ | 30000/50000 [00:16<00:10, 1820.86 examples/s]\n",
- "Map: 62%|██████▏ | 31000/50000 [00:17<00:10, 1795.73 examples/s]\n",
- "Map: 64%|██████▍ | 32000/50000 [00:17<00:09, 1816.44 examples/s]\n",
- "Map: 66%|██████▌ | 33000/50000 [00:18<00:09, 1785.24 examples/s]\n",
- "Map: 68%|██████▊ | 34000/50000 [00:18<00:08, 1832.05 examples/s]\n",
- "Map: 70%|███████ | 35000/50000 [00:19<00:08, 1871.96 examples/s]\n",
- "Map: 72%|███████▏ | 36000/50000 [00:19<00:07, 1871.37 examples/s]\n",
- "Map: 74%|███████▍ | 37000/50000 [00:20<00:07, 1848.49 examples/s]\n",
- "Map: 76%|███████▌ | 38000/50000 [00:20<00:06, 1867.00 examples/s]\n",
- "Map: 78%|███████▊ | 39000/50000 [00:21<00:06, 1812.74 examples/s]\n",
- "Map: 80%|████████ | 40000/50000 [00:22<00:05, 1841.77 examples/s]\n",
- "Map: 82%|████████▏ | 41000/50000 [00:22<00:04, 1815.79 examples/s]\n",
- "Map: 84%|████████▍ | 42000/50000 [00:23<00:04, 1826.65 examples/s]\n",
- "Map: 86%|████████▌ | 43000/50000 [00:23<00:04, 1698.92 examples/s]\n",
- "Map: 88%|████████▊ | 44000/50000 [00:24<00:03, 1745.71 examples/s]\n",
- "Map: 90%|█████████ | 45000/50000 [00:24<00:02, 1773.01 examples/s]\n",
- "Map: 92%|█████████▏| 46000/50000 [00:25<00:02, 1779.97 examples/s]\n",
- "Map: 94%|█████████▍| 47000/50000 [00:26<00:01, 1779.81 examples/s]\n",
- "Map: 96%|█████████▌| 48000/50000 [00:26<00:01, 1777.28 examples/s]\n",
- "Map: 98%|█████████▊| 49000/50000 [00:27<00:00, 1773.03 examples/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m len of train Dataset({\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m features: ['text', 'label', 'input_ids', 'attention_mask'],\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m num_rows: 100\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m }) and test Dataset({\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m features: ['text', 'label', 'input_ids', 'attention_mask'],\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m num_rows: 100\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m })\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m 2023-08-09 14:51:50,865\tWARNING dataset.py:253 -- \u001b[33mImportant: Ray Data requires schemas for all datasets in Ray 2.5. This means that standalone Python objects are no longer supported. In addition, the default batch format is fixed to NumPy. To revert to legacy behavior temporarily, set the environment variable RAY_DATA_STRICT_MODE=0 on all cluster processes.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Learn more here: https://docs.ray.io/en/master/data/faq.html#migrating-to-strict-mode\u001b[0m\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m /tmp/ipykernel_265/307576807.py:57: DeprecationWarning: `HuggingFaceTrainer`, `HuggingFacePredictor` and `HuggingFaceCheckpoint` have been renamed to `TransformersTrainer`, `TransformersPredictor` and `TransformersCheckpoint` respectively. Update your code to use the new import paths. This will raise an exception in the future.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m To disable this warning, you can either:\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:51:51 (running for 00:00:00.12)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 0/6 CPUs, 0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 PENDING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | PENDING | |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:55,978\tWARNING dataset.py:253 -- \u001b[33mImportant: Ray Data requires schemas for all datasets in Ray 2.5. This means that standalone Python objects are no longer supported. In addition, the default batch format is fixed to NumPy. To revert to legacy behavior temporarily, set the environment variable RAY_DATA_STRICT_MODE=0 on all cluster processes.\n",
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m \n",
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m Learn more here: https://docs.ray.io/en/master/data/faq.html#migrating-to-strict-mode\u001b[0m\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:51:56 (running for 00:00:05.16)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:57,260\tINFO backend_executor.py:137 -- Starting distributed worker processes: ['235 (10.130.4.19)', '232 (10.129.4.19)']\n",
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,957\tINFO streaming_executor.py:91 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[RandomizeBlockOrder]\n",
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,957\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n",
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,958\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n",
- "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,969\tINFO streaming_executor.py:149 -- Shutting down .\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,912\tINFO config.py:86 -- Setting up process group for: env:// [rank=0, world_size=2]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:01 (running for 00:00:10.18)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:91 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[RandomizeBlockOrder]\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,274\tINFO streaming_executor.py:149 -- Shutting down .\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:91 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[RandomizeBlockOrder]\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,263\tINFO streaming_executor.py:149 -- Shutting down .\n",
- "Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 151kB/s]\n",
- "Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 146kB/s]\n",
- "Downloading model.safetensors: 0%| | 0.00/268M [00:00, ?B/s]\n",
- "Downloading model.safetensors: 0%| | 0.00/268M [00:00, ?B/s]\n",
- "Downloading model.safetensors: 8%|▊ | 21.0M/268M [00:00<00:01, 165MB/s]\n",
- "Downloading model.safetensors: 16%|█▌ | 41.9M/268M [00:00<00:00, 362MB/s]\n",
- "Downloading model.safetensors: 31%|███▏ | 83.9M/268M [00:00<00:00, 386MB/s]\n",
- "Downloading model.safetensors: 20%|█▉ | 52.4M/268M [00:00<00:01, 213MB/s]\n",
- "Downloading model.safetensors: 47%|████▋ | 126M/268M [00:00<00:00, 366MB/s] \n",
- "Downloading model.safetensors: 31%|███▏ | 83.9M/268M [00:00<00:00, 235MB/s]\n",
- "Downloading model.safetensors: 63%|██████▎ | 168M/268M [00:00<00:00, 339MB/s]\n",
- "Downloading model.safetensors: 43%|████▎ | 115M/268M [00:00<00:00, 246MB/s] \n",
- "Downloading model.safetensors: 78%|███████▊ | 210M/268M [00:00<00:00, 325MB/s]\n",
- "Downloading model.safetensors: 55%|█████▍ | 147M/268M [00:00<00:00, 234MB/s]\n",
- "Downloading model.safetensors: 100%|██████████| 268M/268M [00:00<00:00, 348MB/s]\n",
- "Downloading model.safetensors: 70%|███████ | 189M/268M [00:00<00:00, 265MB/s]\n",
- "Downloading model.safetensors: 86%|████████▌ | 231M/268M [00:00<00:00, 290MB/s]\n",
- "Downloading model.safetensors: 100%|██████████| 268M/268M [00:00<00:00, 270MB/s]\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'classifier.weight']\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'classifier.weight']\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m /tmp/ray/session_2023-08-09_14-46-04_353124_8/runtime_resources/pip/159f134c84a4ce99d7e5ba0207d38de3134de196/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m warnings.warn(\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m /tmp/ray/session_2023-08-09_14-46-04_353124_8/runtime_resources/pip/159f134c84a4ce99d7e5ba0207d38de3134de196/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m warnings.warn(\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m [W reducer.cpp:1300] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m [W reducer.cpp:1300] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:06 (running for 00:00:15.20)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:11 (running for 00:00:20.22)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:16 (running for 00:00:25.24)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:21 (running for 00:00:30.26)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:26 (running for 00:00:35.28)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:31 (running for 00:00:40.30)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:36 (running for 00:00:45.32)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:41 (running for 00:00:50.34)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:46 (running for 00:00:55.36)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:51 (running for 00:01:00.38)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:56 (running for 00:01:05.40)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:01 (running for 00:01:10.42)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:06 (running for 00:01:15.44)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:11 (running for 00:01:20.46)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:16 (running for 00:01:25.48)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:21 (running for 00:01:30.50)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:26 (running for 00:01:35.52)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:31 (running for 00:01:40.54)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:36 (running for 00:01:45.55)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:41 (running for 00:01:50.57)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:46 (running for 00:01:55.59)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:51 (running for 00:02:00.61)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:53:56 (running for 00:02:05.63)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:01 (running for 00:02:10.65)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:06 (running for 00:02:15.67)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:11 (running for 00:02:20.69)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:16 (running for 00:02:25.71)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:21 (running for 00:02:30.73)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:26 (running for 00:02:35.75)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:31 (running for 00:02:40.77)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:36 (running for 00:02:45.79)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:41 (running for 00:02:50.81)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:46 (running for 00:02:55.82)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:51 (running for 00:03:00.84)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:54:56 (running for 00:03:05.86)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:55:02 (running for 00:03:10.88)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:55:07 (running for 00:03:15.90)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:55:12 (running for 00:03:20.92)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:56:47 (running for 00:04:56.31)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:56:52 (running for 00:05:01.33)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:56:57 (running for 00:05:06.35)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:02 (running for 00:05:11.37)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:07 (running for 00:05:16.39)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:12 (running for 00:05:21.41)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:17 (running for 00:05:26.43)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:22 (running for 00:05:31.45)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:27 (running for 00:05:36.47)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:32 (running for 00:05:41.49)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:37 (running for 00:05:46.51)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m {'loss': 0.0128, 'learning_rate': 0.0, 'epoch': 1.0}\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m {'loss': 0.0128, 'learning_rate': 0.0, 'epoch': 1.0}\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m {'train_runtime': 335.4774, 'train_samples_per_second': 37.26, 'train_steps_per_second': 1.166, 'train_loss': 0.01283982952537439, 'epoch': 1.0}\n",
- "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m {'train_runtime': 334.617, 'train_samples_per_second': 37.356, 'train_steps_per_second': 1.169, 'train_loss': 0.01283982952537439, 'epoch': 1.0}\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result for HuggingFaceTrainer_f2621_00000:\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m date: 2023-08-09_14-57-39\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m done: false\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m epoch: 1.0\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m hostname: nteractivetest-worker-small-group-interactivetest-wz5wq\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m iterations_since_restore: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m learning_rate: 0.0\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m loss: 0.0128\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m node_ip: 10.130.4.19\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m pid: 196\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m should_checkpoint: true\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m step: 391\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m time_since_restore: 344.01844906806946\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m time_this_iter_s: 344.01844906806946\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m time_total_s: 344.01844906806946\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m timestamp: 1691618259\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m train_loss: 0.01283982952537439\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m train_runtime: 335.4774\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m train_samples_per_second: 37.26\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m train_steps_per_second: 1.166\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m training_iteration: 1\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m trial_id: f2621_00000\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:48 (running for 00:05:57.07)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+--------+------------------+--------+-----------------+---------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc | iter | total time (s) | loss | learning_rate | epoch |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------+--------+------------------+--------+-----------------+---------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 | 1 | 344.018 | 0.0128 | 0 | 1 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+--------+------------------+--------+-----------------+---------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m 2023-08-09 14:57:48,184\tWARNING util.py:315 -- The `process_trial_save` operation took 8.180 s, which may be a performance bottleneck.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m 2023-08-09 14:57:48,185\tWARNING trial_runner.py:928 -- Consider turning off forced head-worker trial checkpoint syncs by setting sync_on_checkpoint=False. Note that this may result in faulty trial restoration if a failure occurs while the checkpoint is being synced from the worker to the head node.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Trial HuggingFaceTrainer_f2621_00000 completed.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:57:50 (running for 00:05:59.21)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 TERMINATED)\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+------------+-----------------+--------+------------------+--------+-----------------+---------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc | iter | total time (s) | loss | learning_rate | epoch |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+------------+-----------------+--------+------------------+--------+-----------------+---------|\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | TERMINATED | 10.130.4.19:196 | 1 | 344.018 | 0.0128 | 0 | 1 |\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+------------+-----------------+--------+------------------+--------+-----------------+---------+\n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n",
- "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"#call the above cell as a remote ray function\n",
"ray.get(train_fn.remote())"
@@ -1482,7 +289,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57",
"metadata": {},
"outputs": [],
diff --git a/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb
index bcf6e86c5..d0a527e30 100644
--- a/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb
+++ b/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb
@@ -35,16 +35,16 @@
]
},
{
- "cell_type": "markdown",
- "id": "bc27f84c",
- "metadata": {},
- "source": [
- "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n",
- "\n",
- "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
- "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
- ]
- },
+ "cell_type": "markdown",
+ "id": "bc27f84c",
+ "metadata": {},
+ "source": [
+ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n",
+ "\n",
+ "NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. \n",
+ "If you have your own Ray image which suits your purposes, specify it in image field to override the default image."
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,