From cf1d1f1152a5a150be16b3f102f98da1f577cfbb Mon Sep 17 00:00:00 2001
From: Robert Haase <haesleinhuepf@users.noreply.github.com>
Date: Thu, 25 Jul 2024 12:06:17 +0200
Subject: [PATCH] add Huggingface API demo

---
 docs/15_endpoint_apis/30_huggingface.ipynb | 52 +++++++++++++++++++---
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/docs/15_endpoint_apis/30_huggingface.ipynb b/docs/15_endpoint_apis/30_huggingface.ipynb
index 5b5e358..2ccda28 100644
--- a/docs/15_endpoint_apis/30_huggingface.ipynb
+++ b/docs/15_endpoint_apis/30_huggingface.ipynb
@@ -27,7 +27,7 @@
     "            \"text-generation\", model=model, model_kwargs={\"torch_dtype\": torch.bfloat16}, device_map=\"auto\"\n",
     "        )\n",
     "    \n",
-    "    return prompt_hf._pipeline(\"Hey how are you doing today?\")\n",
+    "    return prompt_hf._pipeline(request)[0]['generated_text']\n",
     "prompt_hf._pipeline = None"
    ]
   },
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "460e9067-63b9-4d11-97a0-2d97bbf5b529",
    "metadata": {},
    "outputs": [
@@ -56,7 +56,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "75430d1a95914bf9930bd8238055a367",
+       "model_id": "0c206a5866874d5c8a9112ab7ee9d193",
        "version_major": 2,
        "version_minor": 0
       },
@@ -71,11 +71,21 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Some parameters are on the meta device device because they were offloaded to the cpu and disk.\n",
+      "Some parameters are on the meta device device because they were offloaded to the disk and cpu.\n",
       "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n",
       "C:\\Users\\rober\\miniconda3\\envs\\genai-cpu\\Lib\\site-packages\\transformers\\generation\\utils.py:1259: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
       "  warnings.warn(\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'What is the capital of France? New York City\\nA. Paris\\nB. Philadelphia\\n'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -92,18 +102,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "fd82b669-d8b5-454d-82bf-86ff1fc65d71",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'What is the capital of the Czech Republic? Prague\\n...the Czech Republic? Prague\\n...'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prompt_hf(\"What is the capital of the Czech Republic?\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "efa3b8f8-bb32-4d05-bd6a-3c64c26fb91d",
+   "metadata": {},
+   "source": [
+    "## Exercise\n",
+    "\n",
+    "Explore the [HuggingFace hub for more text-generation models](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending). Download one and test it using the function above. Also read its documentation and consider updating the function above according to the recommendations and examples."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "190b3090-e73c-48df-abf8-33b096f95a58",
+   "id": "bf77db27-bff6-4268-9bdd-8f9710fa5a9a",
    "metadata": {},
    "outputs": [],
    "source": []