diff --git a/Hugging Face/Hugging_Face_Token_Classification_for_Sequence_Labelling_Tasks_using_Inference_API.ipynb b/Hugging Face/Hugging_Face_Token_Classification_for_Sequence_Labelling_Tasks_using_Inference_API.ipynb new file mode 100644 index 0000000000..8648eab173 --- /dev/null +++ b/Hugging Face/Hugging_Face_Token_Classification_for_Sequence_Labelling_Tasks_using_Inference_API.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b58bfcc4-c6b6-4938-9a34-86f97c6a80a8", + "metadata": {}, + "source": [ + "\"Hugging" + ] + }, + { + "cell_type": "markdown", + "id": "db404ea5-a162-41ad-a89a-5b7d45856526", + "metadata": {}, + "source": [ + "# Hugging Face - Token Classification for Sequence Labelling Tasks using Inference API\n", + "

Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "1e8ef932-a062-4abb-b762-317e32648054", + "metadata": {}, + "source": [ + "**Tags:** #huggingface #ml #token_classification #prompt #inference_api #ai #text" + ] + }, + { + "cell_type": "markdown", + "id": "a99ea7c5-5a04-4340-b312-388fbe368ffd", + "metadata": {}, + "source": [ + "**Author:** [Kevin Joseph Scaria](https://www.linkedin.com/in/kevin-scaria/)" + ] + }, + { + "cell_type": "markdown", + "id": "3d629165-0640-46ac-aa14-2ce67eea8718", + "metadata": {}, + "source": [ + "**Last update:** 2023-11-17 (Created: 2023-11-17)" + ] + }, + { + "cell_type": "markdown", + "id": "2c5f0e91-b512-40a3-b417-9e28466ae410", + "metadata": {}, + "source": [ + "**Description:** This notebook demonstrates how to utilize the inference endpoints (additional information can be found here: link) of hugging face models for token classification based sequence labelling tasks." + ] + }, + { + "cell_type": "markdown", + "id": "d65c3967-2a28-4dc3-9158-fbfc4684dbaa", + "metadata": {}, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "ba7aa84f-f166-4b45-98c3-8e5fc9bbe144", + "metadata": {}, + "source": [ + "### Installations and Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80b10f53-fec0-472c-98b5-56b8722e5a80", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-18T04:25:00.763574Z", + "iopub.status.busy": "2023-11-18T04:25:00.763302Z", + "iopub.status.idle": "2023-11-18T04:25:10.601595Z", + "shell.execute_reply": "2023-11-18T04:25:10.600857Z", + "shell.execute_reply.started": "2023-11-18T04:25:00.763506Z" + } + }, + "outputs": [], + "source": [ + "def import_and_install(package, name=None):\n", + " try:\n", + " if name is None:\n", + " globals()[package] = __import__(package)\n", + " else:\n", + " globals()[name] = __import__(package)\n", + " except ImportError:\n", + " !pip install -q {package}\n", + " globals()[package] = __import__(package)\n", + "\n", + "# Usage\n", + "import_and_install('datasets')\n", + "import_and_install('numpy', 'np')\n", + "import_and_install('requests')\n", + "import_and_install('json')\n", + "import_and_install('naas')" + ] + }, + { + "cell_type": "markdown", + "id": "28bcbd55-f82c-4da3-8ee9-bee13686a20f", + "metadata": {}, + "source": [ + "### Inference API\n", + "\n", + "To run inference on huggingface models using their API, we need to extract the API token.\n", + "\n", + "#### Steps to get API token\n", + "- Create an account on [Hugging Face](https://huggingface.co)\n", + "- Log in, and click on profile icon (top right corner)\n", + "- Go to settings\n", + "- Click on [Access tokens](https://huggingface.co/settings/tokens)\n", + "- Now, create a new access token with name: `INFERENCE_API` and role: `read`\n", + "- Copy the generated token and paste it below\n", + "\n", + "\n", + "We will use a pretrained model specifically finetuned for token classification tasks on large corpora of data.\n", + "\n", + "In this demonstration, we will utilize the popular BERT-base model finetuned for NER tasks." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "82bce589-5972-4ce1-8080-be27f96314ef", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-18T04:57:11.766689Z", + "iopub.status.busy": "2023-11-18T04:57:11.766440Z", + "iopub.status.idle": "2023-11-18T04:57:12.148083Z", + "shell.execute_reply": "2023-11-18T04:57:12.147446Z", + "shell.execute_reply.started": "2023-11-18T04:57:11.766664Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "👌 Well done! Your Secret has been sent to production. \n", + "\n", + "PS: to remove the \"Secret\" feature, just replace .add by .delete\n" + ] + } + ], + "source": [ + "# Settting API token as an environment variable using naas module\n", + "\n", + "INFERENCE_API = \"ADD_YOUR_API_TOKEN_HERE\"\n", + "naas.secret.add(\"INFERENCE_API\", INFERENCE_API)\n", + "\n", + "API_TOKEN = naas.secret.get(\"INFERENCE_API\")" + ] + }, + { + "cell_type": "markdown", + "id": "8ef6851a-95f9-4da8-855f-b27ec4a30f5d", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "28a2db1e-bfdd-4b2a-aec5-3459e3e32ad2", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-18T05:16:58.600520Z", + "iopub.status.busy": "2023-11-18T05:16:58.600261Z", + "iopub.status.idle": "2023-11-18T05:16:58.610749Z", + "shell.execute_reply": "2023-11-18T05:16:58.609871Z", + "shell.execute_reply.started": "2023-11-18T05:16:58.600483Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_api_reponse(payload, model, API_KEY):\n", + " API_URL = f\"https://api-inference.huggingface.co/models/{model}\"\n", + " headers = {\"Authorization\": f\"Bearer {API_KEY}\"}\n", + " try:\n", + " response = requests.post(API_URL, headers=headers, json=payload)\n", + " if response.status_code == 200:\n", + " return response.json()\n", + " else:\n", + " print(f\"Error: {response.status_code}, {response.text}\")\n", + " except Exception as e:\n", + " return \"Error: \" + e" + ] + }, + { + "cell_type": "markdown", + "id": "8c3ceb80-fbf9-4de1-97d6-303e04394414", + "metadata": {}, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "1a1e7df8-a8d7-4003-a8ae-bd09c33f90d6", + "metadata": {}, + "source": [ + "Find the appropriate model id from huggingface [models](https://huggingface.co/models) and replace the model in the `MODEL` variable" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "2fb093bc-0e9a-495e-9ee0-787bc43533cb", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-18T05:17:26.350844Z", + "iopub.status.busy": "2023-11-18T05:17:26.350504Z", + "iopub.status.idle": "2023-11-18T05:17:26.691915Z", + "shell.execute_reply": "2023-11-18T05:17:26.691178Z", + "shell.execute_reply.started": "2023-11-18T05:17:26.350805Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'entity_group': 'PER', 'score': 0.9979527592658997, 'word': 'Sarah Jessica Parker', 'start': 11, 'end': 31}, {'entity_group': 'ORG', 'score': 0.9990966320037842, 'word': 'Apple Computers', 'start': 46, 'end': 61}]\n" + ] + } + ], + "source": [ + "input_ = {\"inputs\": \"My name is Sarah Jessica Parker and I work at Apple Computers.\", \n", + " \"options\":{\"wait_for_model\":True,\n", + " \"use_cache\":False}\n", + " }\n", + "MODEL = \"dslim/bert-base-NER\"\n", + "\n", + "response = get_api_reponse(payload=input_, model=MODEL, API_KEY=API_TOKEN)\n", + "print(response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}