Skip to content

Commit

Permalink
examples for downloading data and for testing the model
Browse files Browse the repository at this point in the history
  • Loading branch information
haesleinhuepf committed Jul 29, 2024
1 parent ce8b00b commit 5614507
Show file tree
Hide file tree
Showing 3 changed files with 328 additions and 1 deletion.
169 changes: 168 additions & 1 deletion docs/71_fine_tuning_hf/hf_data_upload.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,176 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "68406b44-19d5-47bf-980e-a7d2909e4e37",
"metadata": {},
"outputs": [],
"source": [
"from datasets import load_dataset"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "fa8356ea-804f-4d9e-9730-e10ffc255ac9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset({\n",
" features: ['question', 'answer'],\n",
" num_rows: 130\n",
"})"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset2_name = \"haesleinhuepf/bio-image-analysis-qa\"\n",
"dataset2 = load_dataset(dataset2_name, split=\"all\")\n",
"dataset2"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "6bc10573-2d12-4842-97ea-b497e3784374",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>question</th>\n",
" <th>answer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>How can we calculate the average values along ...</td>\n",
" <td>\\nThis code imports the numpy library and crea...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>How can I write Python code to apply statistic...</td>\n",
" <td>\\nThe code uses the numpy library in Python, w...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>How can we obtain the precise shape (dimension...</td>\n",
" <td>\\nThis code reads an image file called \"blobs....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>How can we use indices in Python to crop image...</td>\n",
" <td>\\nThis code imports the necessary functions fr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>How can we write Python code to crop an image ...</td>\n",
" <td>\\nThe code imports functions `imshow` and `imr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>How can we use Python code to visualize our `l...</td>\n",
" <td>\\nThe code uses the `curtain` function from th...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>How can we open an image and label objects in ...</td>\n",
" <td>\\nThis code imports the necessary libraries an...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>How can we use Python to analyze the labeled e...</td>\n",
" <td>\\nThe code uses the skimage library's measure ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>What Python code can be used to create a label...</td>\n",
" <td>\\nThis code imports necessary libraries and fu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>129</th>\n",
" <td>Can you provide a Python code for creating nea...</td>\n",
" <td>\\nThis code uses the pyclesperanto_prototype l...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>130 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" question \\\n",
"0 How can we calculate the average values along ... \n",
"1 How can I write Python code to apply statistic... \n",
"2 How can we obtain the precise shape (dimension... \n",
"3 How can we use indices in Python to crop image... \n",
"4 How can we write Python code to crop an image ... \n",
".. ... \n",
"125 How can we use Python code to visualize our `l... \n",
"126 How can we open an image and label objects in ... \n",
"127 How can we use Python to analyze the labeled e... \n",
"128 What Python code can be used to create a label... \n",
"129 Can you provide a Python code for creating nea... \n",
"\n",
" answer \n",
"0 \\nThis code imports the numpy library and crea... \n",
"1 \\nThe code uses the numpy library in Python, w... \n",
"2 \\nThis code reads an image file called \"blobs.... \n",
"3 \\nThis code imports the necessary functions fr... \n",
"4 \\nThe code imports functions `imshow` and `imr... \n",
".. ... \n",
"125 \\nThe code uses the `curtain` function from th... \n",
"126 \\nThis code imports the necessary libraries an... \n",
"127 \\nThe code uses the skimage library's measure ... \n",
"128 \\nThis code imports necessary libraries and fu... \n",
"129 \\nThis code uses the pyclesperanto_prototype l... \n",
"\n",
"[130 rows x 2 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset2.to_pandas()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24ff7579-c67d-4242-8b72-4f41a71c9b4b",
"metadata": {},
"outputs": [],
"source": []
}
Expand Down
159 changes: 159 additions & 0 deletions docs/71_fine_tuning_hf/test_model.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "f1cd554b-2318-41d1-969f-46d7cafd1b40",
"metadata": {},
"source": [
"# Testing the model\n",
"Here we just test our fine-tuned model."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "be1df990-5cbf-4f3e-9059-9fae25f8d3ae",
"metadata": {},
"outputs": [],
"source": [
"def prompt_hf(request, model=\"haesleinhuepf/gemma-2b-it-bia-proof-of-concept2\"):\n",
" global prompt_hf\n",
" import transformers\n",
" import torch\n",
" \n",
" if prompt_hf._pipeline is None: \n",
" prompt_hf._pipeline = transformers.pipeline(\n",
" \"text-generation\", model=model, model_kwargs={\"torch_dtype\": torch.bfloat16}, device_map=\"auto\",\n",
" max_new_tokens=200\n",
" )\n",
" \n",
" return prompt_hf._pipeline(request)[0]['generated_text']\n",
"prompt_hf._pipeline = None"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "958d16ef-f3fa-412f-be38-76101d63a2e5",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ade3065e46914ab5b27eac86d175a10a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n",
"Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n",
"`config.hidden_activation` if you want to override this behaviour.\n",
"See https://github.com/huggingface/transformers/pull/29402 for more details.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "adc6bf6e815348649fd315678923a671",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some parameters are on the meta device device because they were offloaded to the cpu.\n",
"C:\\Users\\rober\\miniconda3\\envs\\genai-gpu\\Lib\\site-packages\\transformers\\models\\gemma\\modeling_gemma.py:482: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
" attn_output = torch.nn.functional.scaled_dot_product_attention(\n"
]
},
{
"data": {
"text/markdown": [
"Write Python code for cropping an image in X and Y to coordinates 10-20 and 30-50 respectively.\n",
"\n",
"```python\n",
"import cv2\n",
"\n",
"# Load the image\n",
"image = cv2.imread(\"image.jpg\")\n",
"\n",
"# Crop the image\n",
"cropped_image = image[10:20, 30:50]\n",
"\n",
"# Save the cropped image\n",
"cv2.imwrite(\"cropped_image.jpg\", cropped_image)\n",
"```\n",
"\n",
"**Explanation:**\n",
"\n",
"* `cv2.imread(\"image.jpg\")` loads the image from the file \"image.jpg\".\n",
"* `image[10:20, 30:50]` crops the image by specifying the coordinates of the top-left and bottom-right corners of the crop.\n",
"* `cv2.imwrite(\"cropped_image.jpg\", cropped_image)` saves the cropped image to the file \"cropped_image.jpg\".\n",
"\n",
"**Note:**\n",
"\n",
"* The `[10:20, 30:50]` coordinates represent the height"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from IPython.display import Markdown, display\n",
"result = prompt_hf(\"Write Python code for cropping an image in X and Y to coordinates 10-20 and 30-50\")\n",
"display(Markdown(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0a04961-d213-47d2-a218-8eacd242a35d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
1 change: 1 addition & 0 deletions docs/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ parts:
chapters:
- file: 71_fine_tuning_hf/fine-tune-gemma.ipynb
- file: 71_fine_tuning_hf/merging_model.ipynb
- file: 71_fine_tuning_hf/test_model.ipynb
- file: 71_fine_tuning_hf/hf_data_upload.ipynb

- caption: Benchmarking
Expand Down

0 comments on commit 5614507

Please sign in to comment.