wandb · andrewtruong · Feb 13, 2024 · Feb 13, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/jobs/deploy_to_azureml/Dockerfile.wandb b/jobs/deploy_to_azureml/Dockerfile.wandb
@@ -0,0 +1,14 @@
+# syntax=docker/dockerfile:1.4
+
+FROM python:3.9-slim
+RUN apt update && apt install gcc -y
+
+RUN apt-get update && apt-get install -y --no-install-recommends build-essential
+
+WORKDIR /launch
+
+COPY --link requirements.txt ./
+RUN pip install -r requirements.txt
+
+COPY --link job.py ./
+ENTRYPOINT ["python3", "job.py"]
diff --git a/jobs/deploy_to_azureml/README.md b/jobs/deploy_to_azureml/README.md
@@ -0,0 +1,60 @@
+# AzureML Online Endpoints Deploy Job
+
+Deploy a model from W&B Artifacts to AzureML Online Endpoints.
+
+This job accepts a model artifact from W&B and deploys it to an AzureML Online Endpoint. It infers supported model types from the artifact and auto-generates the required `main.py/score.py` files, and spins up both the Endpoint (if it doesn't exist) and the Deployment. It also adds logging for each request to the endpoint back to W&B, tracking the inputs, outputs, and any error messages.
+
+## Prerequisites
+
+### Azure
+
+1. Ensure your AzureML workspace is set up. If you haven't already, [create an AzureML workspace](https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources?view=azureml-api-2).
+2. Ensure the client creds you're passing in has the basic AzureML permissions to spin up and deploy endpoints. The `Contributor` role is sufficient, but you might want tighter permissions. (See below "Note on permissions" if you run into other auth issues.)
+   ```
+   az role assignment create --assignee $AZURE_CLIENT_ID --role Contributor --scope /subscriptions/$AZURE_SUBSCRIPTION_ID
+   ```
+
+### W&B
+
+1. The job requires a supported model saved as an artifact in W&B. Currently, the job supports:
+
+   1. **Tensorflow** - We assume SavedModel format. The artifact should look like a SavedModel directory.
+   2. **PyTorch** - We look for any `.pt` or `.pth` files and load the first one as the model.
+   3. **ONNX** - We look for any `.onnx` files and load the first one as the model.
+
+2. You will also need to [set up a launch queue](https://docs.wandb.ai/guides/launch/setup-launch) with an env file that contains `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`, `AZURE_TENANT_ID`
+
+## Usage
+
+1. If the container doesn't already exist, build:
+
+   ```shell
+   docker buildx build -t $IMAGE_NAME:$IMAGE_TAG -f Dockerfile.wandb .
+   ```
+
+2. Add to launch agent queue by passing a valid launch config
+
+   ```shell
+      wandb launch -d $IMAGE_NAME:$IMAGE_TAG -q $YOUR_QUEUE -p $YOUR_PROJECT -c $YOUR_CONFIG
+   ```
+
+   For sample configs, see `configs/`. You can convert one of the configs with `yq` and `jq`. You'll need to update the azure configs to your own!
+
+   ```
+   YOUR_CONFIG="example.json" \
+   EXAMPLE_CONFIG_YML="configs/pytorch.yml" \
+   temp=$(yq eval -o=json $EXAMPLE_CONFIG_YML > $YOUR_CONFIG) \
+   echo $temp | jq '{overrides: {run_config: .}}' > "$YOUR_CONFIG"
+   ```
+
+   If you want to see what the job will run through without actually deploying, set the config `dry_run: true`
+
+3. If the Launch job already exists in W&B, you can configure and run the job from the W&B UI or CLI.
+
+## Note on permissions
+
+1. The generated `main.py/score.py` uses `ManagedIdentityCredential` to authenticate with AzureML. The identity needs read access to `AZURE_KEYVAULT_NAME`. If not specified, the endpoint will be created but deployment will fail. You can use the following commands to grant access to the keyvault:
+   ```
+   az role assignment create --assignee $ENDPOINT_APP_ID --role "Key Vault Secrets User" --scope /$KEYVAULT_SCOPE
+   az keyvault set-policy --name $AZURE_KEYVAULT_NAME --spn $ENDPOINT_APP_ID --secret-permissions get list --key-permissions get list
+   ```
diff --git a/jobs/deploy_to_azureml/demo.ipynb b/jobs/deploy_to_azureml/demo.ipynb
@@ -0,0 +1,129 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Testing endpoints\n",
+    "\n",
+    "1. Fill out the Azure-specific `subscription_id`, `resource_group` and `workspace` variables in the cell below.\n",
+    "2. Fill out the specific `endpoint_name` and `deployment_name` you want to test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential\n",
+    "\n",
+    "subscription_id = \"...\"\n",
+    "resource_group = \"...\"\n",
+    "workspace = \"...\"\n",
+    "\n",
+    "\n",
+    "ml_client = MLClient(\n",
+    "    DefaultAzureCredential(), subscription_id, resource_group, workspace\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "endpoint_name = \"...\"\n",
+    "deployment_name = \"...\"\n",
+    "\n",
+    "t = torch.randn(1, 3, 224, 224)  # Replace with your model input shape\n",
+    "with open(\"sample-request.json\", \"w\") as f:\n",
+    "    f.write(f'{{\"data\": {t.tolist()}}}')\n",
+    "\n",
+    "ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    deployment_name=deployment_name,\n",
+    "    request_file=\"sample-request.json\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test Tensorflow/ONNX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "endpoint_name = \"...\"\n",
+    "deployment_name = \"...\"\n",
+    "\n",
+    "def numpify(t: torch.Tensor) -> np.ndarray:\n",
+    "    \"\"\"Tensorflow and ONNX models expect NHWC format, but PyTorch uses NCHW.\n",
+    "    This function converts a PyTorch tensor to Numpy, and transposes the axes\"\"\"\n",
+    "    a = t.numpy()\n",
+    "    return np.transpose(a, (0, 2, 3, 1))\n",
+    "\n",
+    "\n",
+    "t = torch.randn(1, 3, 224, 224)  # Replace with your model input shape\n",
+    "t = numpify(t)\n",
+    "with open(\"sample-request.json\", \"w\") as f:\n",
+    "    json.dump({\"data\": t.tolist()}, f)\n",
+    "\n",
+    "ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    deployment_name=deployment_name,\n",
+    "    request_file=\"sample-request.json\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "azuredeploy311",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}