From 59c5bb6756cc2d5dc04fa009a7ff27780bd5b925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oph=C3=A9lie=20Le=20Mentec?= <17216799+ouphi@users.noreply.github.com> Date: Thu, 21 Sep 2023 16:02:13 +0200 Subject: [PATCH] Added sample to deploy online endpoint running inference (#2) * Added inference samples. * Use latest mlflow versions * Use latest ultralytics version. * quick cleanup. * cleanup. * Updated libraries version in notebook. * renamed inference environment folder. * Added infoo about request_timeout_ms. --- .gitignore | 5 +- README.md | 64 +++ azureml-environment/Dockerfile | 6 +- azureml/deployment.yaml | 17 + azureml/endpoint.yaml | 3 + deploy-endpoint.sh | 9 + deploy-local-endpoint.sh | 52 ++ inference-code/score.py | 18 + inference-environment/Dockerfile | 9 + inference-sample-request.json | 3 + instructions-az-cli.md | 6 +- instructions-python-sdk.ipynb | 824 +++++++++++++++---------------- 12 files changed, 597 insertions(+), 419 deletions(-) create mode 100644 azureml/deployment.yaml create mode 100644 azureml/endpoint.yaml create mode 100755 deploy-endpoint.sh create mode 100755 deploy-local-endpoint.sh create mode 100644 inference-code/score.py create mode 100644 inference-environment/Dockerfile create mode 100644 inference-sample-request.json diff --git a/.gitignore b/.gitignore index cad84f4..3a5e74f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ yolov8n.pt -coco128/* \ No newline at end of file +coco128/* +.vscode +.idea +__pycache__ \ No newline at end of file diff --git a/README.md b/README.md index 165ee50..24be320 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,67 @@ You can find the detailed instructions to train the yolov8 model with the az cli ## Azure machine learning python SDK [Here is a notebook](instructions-python-sdk.ipynb) showing how to train the yolov8 model with the python SDK. + +## Deploy model for inference + +### Register the model from the workspace UI +You can register the model resulting from a training job. +Go to your job Overview and select "Register model". +Select model of type Unspecified type enable "Show all default outputs" > and select best.pt. +(Note that your training environment needs azureml-mlflow==1.52.0 and mlflow==2.4.2 to enable mlflow logging and being able to retrieve the model) + +### Create the deployment +In `azureml/deployment.yaml`, specify your model + +You can either specify a registered model. + +```yaml +model: azureml:: +``` + +Or specify the relative path of a local .pt file: + +```yaml +model: + path: +``` + +Note that you might need to increase the request_timeout_ms by specifying it in the deployment.yaml if running your +inference takes time +### Deploy your model for inference + +To deploy your endpoint in your azureml workspace: + +Configure your default resource group and azureml workspace: + +```bash +az configure --defaults group=$YOUR_RESOURCE_GROUP workspace=$YOUR_AZ_ML_WORKSPACE +``` + +```bash +./deploy-endpoint.sh +``` + +Note your endpoint name and score uri (you can retrieve them from the azure workspace). + +### Test the endpoint and allocate traffic + +To be able invoke our endpoint with an http client, you need to allocate traffic to your endpoint. (For more information [see this doc](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-safely-rollout-online-endpoints?view=azureml-api-2&tabs=azure-cli#confirm-your-existing-deployment)) + +```bash +az ml online-endpoint show -n $ENDPOINT_NAME --query traffic +``` + +You can see that 0% is allocated to the blue deployment, so let's allocate 100% traffic to our unique blue deployment: + +```bash +az ml online-endpoint update --name $ENDPOINT_NAME --traffic "blue=100" +``` + +Now you should be able to call your endpoint with curl. +You need to retrieve your endpoint key from the azure ml workspace in Endpoints > Consume > Basic consumption info. + +```bash +ENDPOINT_KEY=$YOUR_ENDPOINT_KEY +curl --request POST "$SCORING_URI" --header "Authorization: Bearer $ENDPOINT_KEY" --header 'Content-Type: application/json' --data '{"image_url": "https://ultralytics.com/images/bus.jpg"}' +``` diff --git a/azureml-environment/Dockerfile b/azureml-environment/Dockerfile index 47b382d..eb37cc7 100644 --- a/azureml-environment/Dockerfile +++ b/azureml-environment/Dockerfile @@ -13,6 +13,6 @@ RUN apt install --no-install-recommends -y gcc git zip curl htop libgl1-mesa-glx # https://security.snyk.io/vuln/SNYK-UBUNTU1804-OPENSSL-3314796 RUN apt upgrade --no-install-recommends -y openssl tar -RUN pip install ultralytics==8.0.88 -RUN pip install azureml-mlflow==1.50.0 -RUN pip install mlflow==2.2.2 +RUN pip install ultralytics==8.0.180 +RUN pip install azureml-mlflow==1.52.0 +RUN pip install mlflow==2.4.2 diff --git a/azureml/deployment.yaml b/azureml/deployment.yaml new file mode 100644 index 0000000..84bcc97 --- /dev/null +++ b/azureml/deployment.yaml @@ -0,0 +1,17 @@ +$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json +name: yolodeployment +endpoint_name: yolovendpoint +model: + #azureml::1 + #path: +code_configuration: + code: ../inference-code + scoring_script: score.py +environment: + build: + path: ../inference-environment + dockerfile_path: Dockerfile +instance_type: Standard_DS3_v2 +instance_count: 1 +# Note that you might need to increase the request_timeout_ms if running the inference takes time. +# request_timeout_ms: 10000 \ No newline at end of file diff --git a/azureml/endpoint.yaml b/azureml/endpoint.yaml new file mode 100644 index 0000000..fce7fa1 --- /dev/null +++ b/azureml/endpoint.yaml @@ -0,0 +1,3 @@ +$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json +name: my-endpoint +auth_mode: key diff --git a/deploy-endpoint.sh b/deploy-endpoint.sh new file mode 100755 index 0000000..349430d --- /dev/null +++ b/deploy-endpoint.sh @@ -0,0 +1,9 @@ +set -ex + +export ENDPOINT_NAME=endpt-`echo $RANDOM` +az ml online-endpoint create -n $ENDPOINT_NAME -f azureml/endpoint.yaml +az ml online-deployment create -n blue --endpoint $ENDPOINT_NAME -f azureml/deployment.yaml +az ml online-endpoint show -n $ENDPOINT_NAME +SCORING_URI=$(az ml online-endpoint show -n $ENDPOINT_NAME -o tsv --query scoring_uri) +echo "Endpoint name: $ENDPOINT_NAME" +echo "Scoring uri: $SCORING_URI" diff --git a/deploy-local-endpoint.sh b/deploy-local-endpoint.sh new file mode 100755 index 0000000..f5f374b --- /dev/null +++ b/deploy-local-endpoint.sh @@ -0,0 +1,52 @@ +set -ex + +export ENDPOINT_NAME=endpt-`echo $RANDOM` +az ml online-endpoint create --local -n $ENDPOINT_NAME -f azureml/endpoint.yaml + +# +az ml online-deployment create --local -n blue --endpoint $ENDPOINT_NAME -f azureml/deployment.yaml +# + +# +az ml online-endpoint show -n $ENDPOINT_NAME --local +# + +# check if create was successful +endpoint_status=`az ml online-endpoint show --local --name $ENDPOINT_NAME --query "provisioning_state" -o tsv` +echo $endpoint_status +if [[ $endpoint_status == "Succeeded" ]] +then + echo "Endpoint created successfully" +else + echo "Endpoint creation failed" + exit 1 +fi + +deploy_status=`az ml online-deployment show --local --name blue --endpoint $ENDPOINT_NAME --query "provisioning_state" -o tsv` +echo $deploy_status +if [[ $deploy_status == "Succeeded" ]] +then + echo "Deployment completed successfully" +else + echo "Deployment failed" + exit 1 +fi + +# +az ml online-endpoint invoke --local --name $ENDPOINT_NAME --request-file inference-sample-request.json +# + +# +SCORING_URI=$(az ml online-endpoint show --local -n $ENDPOINT_NAME -o tsv --query scoring_uri) + +curl --request POST "$SCORING_URI" --header 'Content-Type: application/json' --data @inference-sample-request.json + +# +#az ml online-deployment get-logs --local -n blue --endpoint $ENDPOINT_NAME +# + +curl -X POST -H "Content-Type: application/json" -d '{"image_url": "https://ultralytics.com/images/bus.jpg"}' $SCORING_URI + +# +#az ml online-endpoint delete --local --name $ENDPOINT_NAME --yes +# diff --git a/inference-code/score.py b/inference-code/score.py new file mode 100644 index 0000000..ba5ca22 --- /dev/null +++ b/inference-code/score.py @@ -0,0 +1,18 @@ +import os +import json +from ultralytics import YOLO + +def init(): + global model + model_path = os.path.join( + os.getenv("AZUREML_MODEL_DIR"), "best.pt" + ) + model = YOLO(model_path) + + +def run(raw_data): + image_url = json.loads(raw_data)["image_url"] + results = model(image_url) + result = results[0] + serialized_result = json.loads(result.tojson()) + return serialized_result diff --git a/inference-environment/Dockerfile b/inference-environment/Dockerfile new file mode 100644 index 0000000..8869cfc --- /dev/null +++ b/inference-environment/Dockerfile @@ -0,0 +1,9 @@ +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest + +ENV DEBIAN_FRONTEND noninteractive +RUN apt update +RUN TZ=Etc/UTC apt install -y tzdata +RUN apt install --no-install-recommends -y gcc git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev gnupg g++ +RUN apt upgrade --no-install-recommends -y openssl tar +RUN pip install azureml-inference-server-http +RUN pip install ultralytics==8.0.180 diff --git a/inference-sample-request.json b/inference-sample-request.json new file mode 100644 index 0000000..6fab686 --- /dev/null +++ b/inference-sample-request.json @@ -0,0 +1,3 @@ +{ + "image_url": "https://ultralytics.com/images/bus.jpg" +} diff --git a/instructions-az-cli.md b/instructions-az-cli.md index 8f8818e..e694f8f 100644 --- a/instructions-az-cli.md +++ b/instructions-az-cli.md @@ -47,9 +47,9 @@ RUN apt install --no-install-recommends -y gcc git zip curl htop libgl1-mesa-glx # https://security.snyk.io/vuln/SNYK-UBUNTU1804-OPENSSL-3314796 RUN apt upgrade --no-install-recommends -y openssl tar -RUN pip install ultralytics==8.0.88 -RUN pip install azureml-mlflow==1.50.0 -RUN pip install mlflow==2.2.2 +RUN pip install ultralytics==8.0.132 +RUN pip install azureml-mlflow==1.52.0 +RUN pip install mlflow==2.4.2 ``` Note that Ultralytics provides [Dockerfiles for different platform](https://github.com/ultralytics/ultralytics/tree/main/docker). Here we used the same base image and installed the same linux dependencies than the [amd64 Dockerfile](https://github.com/ultralytics/ultralytics/blob/main/docker/Dockerfile), but we installed the ultralytics package with pip install to control the version we install and make sure the package version is deterministic. To track hyperparameters and metrics in AzureML, we installed [mlflow](https://pypi.org/project/mlflow/) and [azureml-mlflow](https://pypi.org/project/azureml-mlflow/). This enables us to evaluate our model performance easily and compare models from various training runs in AzureML studio. diff --git a/instructions-python-sdk.ipynb b/instructions-python-sdk.ipynb index 1ac51dc..1eb102c 100644 --- a/instructions-python-sdk.ipynb +++ b/instructions-python-sdk.ipynb @@ -1,416 +1,416 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to train the yolov8 model with Azure Machine learning - Python SDK\n", - "Azure Machine Learning provides a comprehensive solution for managing the entire lifecycle of machine learning models. In this tutorial, we'll explore how to use AzureML to train and continuously improve an open source model. Here we will train the [Yolov8 model](https://github.com/ultralytics/ultralytics) object detection model developed by Ultralytics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1682583614932 - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential\n", - "\n", - "# Let's login configure your workspace and resource group.\n", - "credential = DefaultAzureCredential()\n", - "\n", - "# Get a handle to the workspace. You can find the info on the workspace tab on ml.azure.com\n", - "ml_client = MLClient(\n", - " credential=credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"your-azureml-workspace-name\",\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create an AzureML environment\n", - "We need to create an [AzureML environment](https://learn.microsoft.com/azure/machine-learning/how-to-manage-environments-v2) with all the required dependencies to run our training.\n", - "\n", - "We will create a folder `azureml-environment`, it will contain the docker-context to build the environment. In this folder, we will add a Dockerfile with the required dependencies to run our training:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "%%bash\n", - "mkdir azureml-environment\n", - "echo \"\"\"\n", - "FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime\n", - "\n", - "# Downloads to user config dir\n", - "ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/\n", - "\n", - "# Install linux packages\n", - "ENV DEBIAN_FRONTEND noninteractive\n", - "RUN apt update\n", - "RUN TZ=Etc/UTC apt install -y tzdata\n", - "RUN apt install --no-install-recommends -y gcc git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev gnupg g++\n", - "\n", - "# Security updates\n", - "# https://security.snyk.io/vuln/SNYK-UBUNTU1804-OPENSSL-3314796\n", - "RUN apt upgrade --no-install-recommends -y openssl tar\n", - "\n", - "RUN pip install ultralytics==8.0.83\n", - "RUN pip install azureml-mlflow==1.50.0\n", - "RUN pip install mlflow==2.2.2\n", - "\"\"\" > azureml-environment/Dockerfile\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that Ultralytics provides [Dockerfiles for different platform](https://github.com/ultralytics/ultralytics/tree/main/docker). Here we used the same base image and installed the same linux dependencies than the [amd64 Dockerfile](https://github.com/ultralytics/ultralytics/blob/main/docker/Dockerfile), but we installed the ultralytics package with pip install to control the version we install and make sure the package version is deterministic. To track hyperparameters and metrics in AzureML, we installed [mlflow](https://pypi.org/project/mlflow/) and [azureml-mlflow](https://pypi.org/project/azureml-mlflow/). This enables us to evaluate our model performance easily and compare models from various training runs in AzureML studio.\n", - "Now let's create our environment:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1682583690659 - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Environment, BuildContext\n", - "\n", - "env_docker_context = Environment(\n", - " build=BuildContext(path=\"azureml-environment\"),\n", - " name=\"yolov8-environment\",\n", - " description=\"Environment created from a Docker context.\",\n", - ")\n", - "ml_client.environments.create_or_update(env_docker_context)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create an AzureML compute cluster\n", - "We need a compute instance from where we can run the training. We will create a [compute cluster](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-attach-compute-cluster?view=azureml-api-2&tabs=python#what-is-a-compute-cluster) that auto-scales from 0 to 2 active nodes.\n", - "We will make sure that:\n", - "\n", - "- Idling nodes scale down after 2 minutes of inactivity\n", - "- The minimum number of running nodes is 0, to avoid the cost of idling nodes.\n", - "\n", - "You can find [more information about clusters auto-scaling here](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-optimize-cost?view=azureml-api-2#configure-training-clusters-for-autoscaling)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1682583721430 - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import AmlCompute\n", - "\n", - "cluster = AmlCompute(\n", - " name=\"cluster-with-1-k80-gpu\",\n", - " type=\"amlcompute\",\n", - " size=\"Standard_NC6\",\n", - " location=\"westeurope\",\n", - " min_instances=0,\n", - " max_instances=2,\n", - " idle_time_before_scale_down=120,\n", - ")\n", - "ml_client.begin_create_or_update(cluster).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create an AzureML dataset\n", - "For this tutorial we will use the [coco128 dataset](https://www.kaggle.com/ultralytics/coco128). We will create an [AzureML data asset](https://learn.microsoft.com/azure/machine-learning/how-to-create-data-assets) to bookmark our dataset and easily use the dataset for various trainings.\n", - "\n", - "Let's download our training dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "# For this tutorial we will use the coco128 dataset: https://www.kaggle.com/ultralytics/coco128. We will create an AzureML data asset to bookmark our dataset and easily use the dataset for various trainings.\n", - "wget https://ultralytics.com/assets/coco128.zip\n", - "unzip coco128.zip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1682583756911 - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Data\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# Create AzureML dataset\n", - "\n", - "my_data = Data(\n", - " path=\"coco128\",\n", - " type=AssetTypes.URI_FOLDER,\n", - " description=\"Coco 128 dataset\",\n", - " name=\"coco128\"\n", - ")\n", - "\n", - "ml_client.data.create_or_update(my_data)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we specified the local path of the dataset, which means that the dataset will be uploaded from your local to AzureML. But note that AzureML dataset supports [several type of paths](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-data-assets?view=azureml-api-2&tabs=cli#supported-paths), for example a path on Azure storage. Your local dataset will be uploaded to AzureML. Now your dataset name should be `azureml:coco128:1`. You can see your dataset in AzureML studio in Data > Data asset. Note that if you create a dataset with the same name several time, it will create several versions of your dataset." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Register a pre-trained model\n", - "We will train a pre-trained model. Note that you can find the [yolov8 pre-trained models here](https://github.com/ultralytics/ultralytics/blob/main/README.md#models).\n", - "\n", - "Let's download the yolov8n.pt model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "# Download the yolov8 model\n", - "wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1682583759832 - } - }, - "outputs": [], - "source": [ - "# Register model\n", - "from azure.ai.ml.entities import Model\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "file_model = Model(\n", - " path=\"yolov8n.pt\",\n", - " type=AssetTypes.CUSTOM_MODEL,\n", - " name=\"yolov8n\",\n", - " description=\"yolov8n model.\",\n", - ")\n", - "ml_client.models.create_or_update(file_model)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training code\n", - "We will create a `training-code` folder containing the required files to run our training.\n", - "I want to show you how you can create your custom dataset definition. So we will download the [coco128.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco128.yaml) and call it custom-coco128.yaml.\n", - "We have to put this file in the `training-code` folder to make sure it is available when running our training:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "mkdir training-code\n", - "wget https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/datasets/coco128.yaml -O training-code/custom-coco128.yaml" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We want to ensure that the AzureML job uses our dataset coco128, rather than downloading it during the job, so let's remove the last line of the `custom-coco128.yaml`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "sed -i \"s|download:.*$||\" training-code/custom-coco128.yaml" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run the training\n", - "\n", - "An AzureML job execute a task against a compute target. We will create an AzureMl job that executes the yolov8 training against the compute cluster we created earlier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1682583824313 - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import command\n", - "from azure.ai.ml import Input\n", - "\n", - "job = command(\n", - " inputs=dict(\n", - " training_data=Input(\n", - " type=\"uri_folder\",\n", - " path=\"azureml:coco128:1\",\n", - " ),\n", - " model_to_train=Input(\n", - " type=\"custom_model\",\n", - " path=\"azureml:yolov8n:1\"\n", - " )\n", - " ),\n", - " code=\"training-code\",\n", - " command=\"\"\"\n", - " sed -i \"s|path:.*$|path: ${{ inputs.training_data }}|\" custom-coco128.yaml &&\n", - " yolo task=detect train data=custom-coco128.yaml model=${{ inputs.model_to_train }} epochs=3 project=yolov8-experiment name=experiment\n", - " \"\"\",\n", - " environment=\"azureml:yolov8-environment:1\",\n", - " compute=\"cluster-with-1-k80-gpu\",\n", - " display_name=\"yolov8-experiment\",\n", - " experiment_name=\"yolov8-experiment\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's have a closer look to this job definition.\n", - "\n", - "You can see that we defined an input `training_data`, this is our coco dataset.\n", - "AzureML will mount or download this dataset, and when using `${{ inputs.training_data }}` in the command, AzureML will take care of resolving the filesystem path value.\n", - "\n", - "We will train our model with the following command:\n", - "\n", - "```bash\n", - "yolo task=detect train data=custom-coco128.yaml model=${{ inputs.model_to_train }} epochs=3 project=yolov8-experiment name=experiment\n", - "```\n", - "\n", - "Here we hard-coded `epochs=3`. This value could be passed as an input parameter.\n", - "You can look at the [ultralytics documentation](https://github.com/ultralytics/ultralytics/blob/main/docs/usage/cfg.md#train) to get more details about each settings.\n", - "\n", - "In this command we pass `data=custom-coco128.yaml`. Our dataset definition `custom-coco128.yaml` should contain a setting called path, that represents the dataset root dir. For now its value is:\n", - "\n", - "```yaml\n", - "path: ../datasets/coco128\n", - "```\n", - "\n", - "We want to change the path value to be the path of our AzureML dataset coco128. \n", - "In AzureML jobs, [job inputs datasets can be accessed by mounting or downloading](https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-job-command?view=azureml-api-2#job-inputs) them. When you use ${{ inputs.training_data }} in a command, AzureML resolves the filesystem path of the dataset. However, it is not guaranteed that the filesystem path is consistent between different job runs, so we can't hardcode the path in the yaml file.\n", - "To work around this, you can dynamically edit the path in the AzureML job, just before running the training.\n", - "That's why we added the following sed command to replace `path: ` by `path: `.\n", - "\n", - "```bash\n", - "sed -i \"s|path:.*$|path: ${{ inputs.training_data }}|\" coco128.yaml\n", - "```\n", - "\n", - "We also defined a `model_to_train`. This is the pre-trained model that we will start the training from.\n", - "\n", - "Now let's submit the job:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ml_client.create_or_update(job)" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to train the yolov8 model with Azure Machine learning - Python SDK\n", + "Azure Machine Learning provides a comprehensive solution for managing the entire lifecycle of machine learning models. In this tutorial, we'll explore how to use AzureML to train and continuously improve an open source model. Here we will train the [Yolov8 model](https://github.com/ultralytics/ultralytics) object detection model developed by Ultralytics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1682583614932 + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "from azure.identity import DefaultAzureCredential\n", + "\n", + "# Let's login configure your workspace and resource group.\n", + "credential = DefaultAzureCredential()\n", + "\n", + "# Get a handle to the workspace. You can find the info on the workspace tab on ml.azure.com\n", + "ml_client = MLClient(\n", + " credential=credential,\n", + " subscription_id=\"\",\n", + " resource_group_name=\"\",\n", + " workspace_name=\"your-azureml-workspace-name\",\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an AzureML environment\n", + "We need to create an [AzureML environment](https://learn.microsoft.com/azure/machine-learning/how-to-manage-environments-v2) with all the required dependencies to run our training.\n", + "\n", + "We will create a folder `azureml-environment`, it will contain the docker-context to build the environment. In this folder, we will add a Dockerfile with the required dependencies to run our training:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "%%bash\n", + "mkdir azureml-environment\n", + "echo \"\"\"\n", + "FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime\n", + "\n", + "# Downloads to user config dir\n", + "ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/\n", + "\n", + "# Install linux packages\n", + "ENV DEBIAN_FRONTEND noninteractive\n", + "RUN apt update\n", + "RUN TZ=Etc/UTC apt install -y tzdata\n", + "RUN apt install --no-install-recommends -y gcc git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev gnupg g++\n", + "\n", + "# Security updates\n", + "# https://security.snyk.io/vuln/SNYK-UBUNTU1804-OPENSSL-3314796\n", + "RUN apt upgrade --no-install-recommends -y openssl tar\n", + "\n", + "RUN pip install ultralytics==8.0.180\n", + "RUN pip install azureml-mlflow==1.52.0\n", + "RUN pip install mlflow==2.4.2\n", + "\"\"\" > azureml-environment/Dockerfile\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that Ultralytics provides [Dockerfiles for different platform](https://github.com/ultralytics/ultralytics/tree/main/docker). Here we used the same base image and installed the same linux dependencies than the [amd64 Dockerfile](https://github.com/ultralytics/ultralytics/blob/main/docker/Dockerfile), but we installed the ultralytics package with pip install to control the version we install and make sure the package version is deterministic. To track hyperparameters and metrics in AzureML, we installed [mlflow](https://pypi.org/project/mlflow/) and [azureml-mlflow](https://pypi.org/project/azureml-mlflow/). This enables us to evaluate our model performance easily and compare models from various training runs in AzureML studio.\n", + "Now let's create our environment:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1682583690659 + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Environment, BuildContext\n", + "\n", + "env_docker_context = Environment(\n", + " build=BuildContext(path=\"azureml-environment\"),\n", + " name=\"yolov8-environment\",\n", + " description=\"Environment created from a Docker context.\",\n", + ")\n", + "ml_client.environments.create_or_update(env_docker_context)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an AzureML compute cluster\n", + "We need a compute instance from where we can run the training. We will create a [compute cluster](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-attach-compute-cluster?view=azureml-api-2&tabs=python#what-is-a-compute-cluster) that auto-scales from 0 to 2 active nodes.\n", + "We will make sure that:\n", + "\n", + "- Idling nodes scale down after 2 minutes of inactivity\n", + "- The minimum number of running nodes is 0, to avoid the cost of idling nodes.\n", + "\n", + "You can find [more information about clusters auto-scaling here](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-optimize-cost?view=azureml-api-2#configure-training-clusters-for-autoscaling)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1682583721430 + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import AmlCompute\n", + "\n", + "cluster = AmlCompute(\n", + " name=\"cluster-with-1-k80-gpu\",\n", + " type=\"amlcompute\",\n", + " size=\"Standard_NC6\",\n", + " location=\"westeurope\",\n", + " min_instances=0,\n", + " max_instances=2,\n", + " idle_time_before_scale_down=120,\n", + ")\n", + "ml_client.begin_create_or_update(cluster).result()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an AzureML dataset\n", + "For this tutorial we will use the [coco128 dataset](https://www.kaggle.com/ultralytics/coco128). We will create an [AzureML data asset](https://learn.microsoft.com/azure/machine-learning/how-to-create-data-assets) to bookmark our dataset and easily use the dataset for various trainings.\n", + "\n", + "Let's download our training dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "# For this tutorial we will use the coco128 dataset: https://www.kaggle.com/ultralytics/coco128. We will create an AzureML data asset to bookmark our dataset and easily use the dataset for various trainings.\n", + "wget https://ultralytics.com/assets/coco128.zip\n", + "unzip coco128.zip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1682583756911 } - ], - "metadata": { - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK v2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - }, - "microsoft": { - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - }, - "orig_nbformat": 4 + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Data\n", + "from azure.ai.ml.constants import AssetTypes\n", + "\n", + "# Create AzureML dataset\n", + "\n", + "my_data = Data(\n", + " path=\"coco128\",\n", + " type=AssetTypes.URI_FOLDER,\n", + " description=\"Coco 128 dataset\",\n", + " name=\"coco128\"\n", + ")\n", + "\n", + "ml_client.data.create_or_update(my_data)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we specified the local path of the dataset, which means that the dataset will be uploaded from your local to AzureML. But note that AzureML dataset supports [several type of paths](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-data-assets?view=azureml-api-2&tabs=cli#supported-paths), for example a path on Azure storage. Your local dataset will be uploaded to AzureML. Now your dataset name should be `azureml:coco128:1`. You can see your dataset in AzureML studio in Data > Data asset. Note that if you create a dataset with the same name several time, it will create several versions of your dataset." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register a pre-trained model\n", + "We will train a pre-trained model. Note that you can find the [yolov8 pre-trained models here](https://github.com/ultralytics/ultralytics/blob/main/README.md#models).\n", + "\n", + "Let's download the yolov8n.pt model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "# Download the yolov8 model\n", + "wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1682583759832 + } + }, + "outputs": [], + "source": [ + "# Register model\n", + "from azure.ai.ml.entities import Model\n", + "from azure.ai.ml.constants import AssetTypes\n", + "\n", + "file_model = Model(\n", + " path=\"yolov8n.pt\",\n", + " type=AssetTypes.CUSTOM_MODEL,\n", + " name=\"yolov8n\",\n", + " description=\"yolov8n model.\",\n", + ")\n", + "ml_client.models.create_or_update(file_model)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training code\n", + "We will create a `training-code` folder containing the required files to run our training.\n", + "I want to show you how you can create your custom dataset definition. So we will download the [coco128.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco128.yaml) and call it custom-coco128.yaml.\n", + "We have to put this file in the `training-code` folder to make sure it is available when running our training:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "mkdir training-code\n", + "wget https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/datasets/coco128.yaml -O training-code/custom-coco128.yaml" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We want to ensure that the AzureML job uses our dataset coco128, rather than downloading it during the job, so let's remove the last line of the `custom-coco128.yaml`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "sed -i \"s|download:.*$||\" training-code/custom-coco128.yaml" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run the training\n", + "\n", + "An AzureML job execute a task against a compute target. We will create an AzureMl job that executes the yolov8 training against the compute cluster we created earlier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1682583824313 + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml import command\n", + "from azure.ai.ml import Input\n", + "\n", + "job = command(\n", + " inputs=dict(\n", + " training_data=Input(\n", + " type=\"uri_folder\",\n", + " path=\"azureml:coco128:1\",\n", + " ),\n", + " model_to_train=Input(\n", + " type=\"custom_model\",\n", + " path=\"azureml:yolov8n:1\"\n", + " )\n", + " ),\n", + " code=\"training-code\",\n", + " command=\"\"\"\n", + " sed -i \"s|path:.*$|path: ${{ inputs.training_data }}|\" custom-coco128.yaml &&\n", + " yolo task=detect train data=custom-coco128.yaml model=${{ inputs.model_to_train }} epochs=3 project=yolov8-experiment name=experiment\n", + " \"\"\",\n", + " environment=\"azureml:yolov8-environment:1\",\n", + " compute=\"cluster-with-1-k80-gpu\",\n", + " display_name=\"yolov8-experiment\",\n", + " experiment_name=\"yolov8-experiment\"\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's have a closer look to this job definition.\n", + "\n", + "You can see that we defined an input `training_data`, this is our coco dataset.\n", + "AzureML will mount or download this dataset, and when using `${{ inputs.training_data }}` in the command, AzureML will take care of resolving the filesystem path value.\n", + "\n", + "We will train our model with the following command:\n", + "\n", + "```bash\n", + "yolo task=detect train data=custom-coco128.yaml model=${{ inputs.model_to_train }} epochs=3 project=yolov8-experiment name=experiment\n", + "```\n", + "\n", + "Here we hard-coded `epochs=3`. This value could be passed as an input parameter.\n", + "You can look at the [ultralytics documentation](https://github.com/ultralytics/ultralytics/blob/main/docs/usage/cfg.md#train) to get more details about each settings.\n", + "\n", + "In this command we pass `data=custom-coco128.yaml`. Our dataset definition `custom-coco128.yaml` should contain a setting called path, that represents the dataset root dir. For now its value is:\n", + "\n", + "```yaml\n", + "path: ../datasets/coco128\n", + "```\n", + "\n", + "We want to change the path value to be the path of our AzureML dataset coco128. \n", + "In AzureML jobs, [job inputs datasets can be accessed by mounting or downloading](https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-job-command?view=azureml-api-2#job-inputs) them. When you use ${{ inputs.training_data }} in a command, AzureML resolves the filesystem path of the dataset. However, it is not guaranteed that the filesystem path is consistent between different job runs, so we can't hardcode the path in the yaml file.\n", + "To work around this, you can dynamically edit the path in the AzureML job, just before running the training.\n", + "That's why we added the following sed command to replace `path: ` by `path: `.\n", + "\n", + "```bash\n", + "sed -i \"s|path:.*$|path: ${{ inputs.training_data }}|\" coco128.yaml\n", + "```\n", + "\n", + "We also defined a `model_to_train`. This is the pre-trained model that we will start the training from.\n", + "\n", + "Now let's submit the job:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.create_or_update(job)" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python310-sdkv2" + }, + "kernelspec": { + "display_name": "Python 3.10 - SDK v2", + "language": "python", + "name": "python310-sdkv2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + }, + "microsoft": { + "ms_spell_check": { + "ms_spell_check_language": "en" + } + }, + "nteract": { + "version": "nteract-front-end@1.0.0" }, - "nbformat": 4, - "nbformat_minor": 2 + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 }