diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..61f2dc9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +**/__pycache__/ diff --git a/mnist-service/app.py b/mnist-service/app.py deleted file mode 100644 index 0bda93c..0000000 --- a/mnist-service/app.py +++ /dev/null @@ -1,25 +0,0 @@ -import gradio as gr -import tensorflow as tf -import numpy as np -from PIL import Image - -def predict(img_arr): - # Preprocess the image before passing it to the model - img_arr = tf.expand_dims(img_arr, 0) - img_arr = img_arr[:, :, :, 0] # Keep only the first channel (grayscale) - - # Load the trained model - loaded_model = tf.keras.models.load_model('mnist_model.h5') - - # Make predictions - predictions = loaded_model.predict(img_arr) - predicted_label = tf.argmax(predictions[0]).numpy() - - return str(predicted_label) - -# Setup the gradio interface -gr.Interface(fn=predict, - inputs="image", - outputs="label", - examples=[["0.jpg"], ["1.jpg"]] -).launch(server_name="0.0.0.0", server_port=8080) \ No newline at end of file diff --git a/mnist-service/gen_example_images.py b/mnist-service/gen_example_images.py deleted file mode 100644 index a2d054c..0000000 --- a/mnist-service/gen_example_images.py +++ /dev/null @@ -1,17 +0,0 @@ -import numpy as np -from PIL import Image -from tensorflow.keras.datasets import mnist - -# Load MNIST dataset - -(_, _), (x_test, y_test) = mnist.load_data() - -# Find the indices of images with label 0 and 1 - -zero_indices = np.where(y_test == 0)[0] -one_indices = np.where(y_test == 1)[0] - -# Convert the images to PIL format and save as 0.jpg and 1.jpg - -Image.fromarray(x_test[zero_indices[0]]).save("0.jpg") -Image.fromarray(x_test[one_indices[0]]).save("1.jpg") \ No newline at end of file diff --git a/mnist-service/mnist_model.h5 b/mnist-service/mnist_model.h5 deleted file mode 100644 index 302f9af..0000000 Binary files a/mnist-service/mnist_model.h5 and /dev/null differ diff --git a/mnist-service/python_sdk_mnist.ipynb b/mnist-service/python_sdk_mnist.ipynb deleted file mode 100644 index 319b289..0000000 --- a/mnist-service/python_sdk_mnist.ipynb +++ /dev/null @@ -1,530 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "QOAnoPl-dlSY", - "tags": [] - }, - "source": [ - "# Deploy Machine Learning Service on Truefoundry\n", - "This notebook demonstrates a demo on how you can deploy an image classification model trained on mnist dataset as a Gradio App on truefoundry platform." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Prerequisites\n", - "\n", - "Before we begin, make sure you have the following prerequisites in place:\n", - "\n", - "1. **Install `servicefoundry`** (Note: `servicefoundry` is pre-installed in Truefoundry notebooks). You can install it using the following command:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -U \"servicefoundry\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2. **Login to servicefoundry**\n", - "\n", - "Enter your host in the `--host` argument, eg: \"https://your-domain.truefoundry.com\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!sfy login --host \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2c6nhZIxSvl2" - }, - "source": [ - "3. **Select the `Workspace`** in which you want to deploy your application.
Once you run the cell below you will get a prompt to enter your workspace.
\n", - " * **Step 1:** Navigate to the **Workspace** tab on the left panel of your User Interface.\n", - " * **Step 2:** Identify the Workspace you want to deploy the application in.\n", - " * **Step 3:** Copy the Workspace FQN
\n", - " ![Copying Workspace FQN](https://files.readme.io/730fee2-Screenshot_2023-02-28_at_2.08.34_PM.png)\n", - " * **Step 4:** Paste the Workspace FQN in the prompt and press enter." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "elr9RXA4En1G" - }, - "outputs": [], - "source": [ - "workspace_fqn = input(\"Enter your Workspace FQN: \")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "4. **Setup Logging**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clone the Getting Started Repo\n", - "\n", - "In this step, we will clone the Truefoundry Getting Started repository. This repository contains the service code that we are going to deploy." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!git clone https://github.com/truefoundry/getting-started-examples.git" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's `cd` into the directory containing our inference code, i.e `getting-started-examples/mnist-service`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%cd getting-started-examples/mnist-service" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "T9K_qE3ol5De", - "tags": [] - }, - "source": [ - "### Code Structure\n", - "Before we proceed, let's take a quick look at the structure of the code you'll be deploying:\n", - "\n", - "```\n", - ".\n", - "|_ app.py: Contains the Gradio Service code used to serve your model.\n", - "|_ requirements.txt: Dependency file.\n", - "|_ gen_example_images.py: Code to generate example images that you can use to test your Gradio service.\n", - "|_ train.py: Contains the Training code used to train the model we are deploying.\n", - "```\n", - "\n", - "The `app.py` file houses the code that enables you to deploy and interact with your trained model using Gradio. Here's a brief overview of its components:\n", - "\n", - "* **Prediction Function:**\n", - " * The predict function is responsible for making predictions using the trained model.\n", - " * Preprocess the input image and prepare it for model inference.\n", - " * Load the trained model and perform predictions to identify the digit in the image.\n", - "* **Gradio Interface Setup:**\n", - " * Create a Gradio interface that takes an input image and produces a predicted label.\n", - " * Provide examples of images (\"0.jpg\" and \"1.jpg\") for users to easily test the interface.\n", - "\n", - "```python\n", - "import gradio as gr \n", - "import tensorflow as tf \n", - "import numpy as np \n", - "from PIL import Image\n", - "\n", - "def predict(img_arr):\n", - "\t# Preprocess the image before passing it to the model\n", - " img_arr = tf.expand_dims(img_arr, 0) \n", - " img_arr = img_arr[:, :, :, 0] # Keep only the first channel (grayscale)\n", - "\n", - "\t# Load the trained model\n", - " loaded_model = tf.keras.models.load_model('mnist_model.h5')\n", - "\n", - "\t# Make predictions\n", - " predictions = loaded_model.predict(img_arr) \n", - " predicted_label = tf.argmax(predictions[0]).numpy()\n", - "\n", - " return str(predicted_label)\n", - "\n", - "# Setup the gradio interface\n", - "gr.Interface(fn=predict, \n", - " inputs=\"image\", \n", - " outputs=\"label\", \n", - " examples=[[\"0.jpg\"], [\"1.jpg\"]] \n", - ").launch(server_name=\"0.0.0.0\", server_port=8080)\n", - "```\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TR7Ped05Crli" - }, - "source": [ - "## Deploying Your Machine Learning Service\n", - "\n", - "Now, let's move on to the deployment steps." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 1: Set Up Deployment Configuration\n", - "In this step, you will define your deployment configuration using the Truefoundry Python SDK. We will provide explanations for each parameter and guide you through the process." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Name\n", - "In the provided Python script, set a unique identifier for your service using the name field." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "name = \"mnist-service\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Image\n", - "\n", - "* Choosing the Right Approach for specifying image:\n", - " Depending on your scenario, you can choose to deploy either a pre-built Docker image or build a Docker image from your source code.\n", - " \n", - "* Using Pre-Built Images\n", - " If you already have a Docker image that you've previously built and pushed to a container registry, you can use the `Image` class.\n", - " The `Image class` would simply reference the pre-built image URL and use it for deployment.\n", - "* Using Build for Source Code\n", - " In cases where you don't have a pre-built image, you'll use the `Build` option to create an image from your source code.\n", - " This scenario applies when you want to package and deploy your application from scratch.\n", - " * Creating DockerFile with PythonBuild\n", - " If you don't have a Dockerfile but your application is written in Python, you can use the `PythonBuild` class.\n", - " The `PythonBuild` class will inspect your Python code and create a Dockerfile automatically based on the code's requirements.\n", - " * Choosing DockerBuild for Dockerfile\n", - " If you have a pre-existing Dockerfile, you can use the `DockerBuild` class.\n", - " This allows you to directly reference the Dockerfile present in your code repository.\n", - "\n", - "In this case given we did not have a prebuilt image, and no dockerfile in our source code we are using PythonBuild, which takes our code configuration from us and templatizes a Dockerfile for us." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from servicefoundry import Build, PythonBuild, LocalSource\n", - "\n", - "image = Build(\n", - " build_spec=PythonBuild(\n", - " command=\"python app.py\",\n", - " requirements_path=\"requirements.txt\"\n", - " ),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Ports\n", - "* Specify the **Port** for routing customer traffic to your deployed application using the Port option.\n", - "* Specify the **Host** to define how the external world will access your application.\n", - "\n", - "\n", - "> 📘 Picking a value for `host`\n", - "> \n", - "> Providing a host value depends on the base domain urls configured in the cluster settings, you can learn how to find the base domain urls available to you [here](doc:checking-configured-domain)\n", - "> \n", - "> For e.g. If your base domain url is `*.truefoundry.your-org.com` then a valid value can be `fastapi-your-workspace-8000.truefoundry.your-org.com`. \n", - "> \n", - "> Alternatively if you have a non wildcard based domain url e.g. `truefoundry.your-org.com`, then a valid value can be `truefoundry.your-org.com/fastapi-your-workspace-8000`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from servicefoundry import Port\n", - "\n", - "ports = [\n", - " Port(\n", - " port=8080,\n", - " host=\"\"\n", - " )\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Resources\n", - "Allocate computing resources (CPU, memory, storage) for your service using the Resources option.
\n", - "* **CPU** refers to the computing power available to your application\n", - "* **Memory** refers to how much space your application has to hold and work with data while it's running\n", - "* **Ephemeral storage** is where your application can temporarily store files and data\n", - "\n", - "Requests and Limits:\n", - "\n", - "* **Request** is like asking for a certain amount of a resource. It's what your application initially asks for to start working properly.\n", - "* **Limit** is like setting a maximum value. It restricts how much of a resource (like CPU or memory) your application can use.\n", - "\n", - "So for each category of resource you specify the Request and Limits" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from servicefoundry import Resources\n", - "\n", - "resources = Resources(\n", - " memory_limit=500,\n", - " memory_request=500,\n", - " ephemeral_storage_limit=600,\n", - " ephemeral_storage_request=600,\n", - " cpu_limit=0.3,\n", - " cpu_request=0.3\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2: Bring all of the configuration together via the Service Class and Deploy\n", - "\n", - "To deploy your machine learning service, you need to create an instance of the `Service` class provided by the servicefoundry library. This instance will encapsulate all the necessary configurations and parameters for deploying and managing your service." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from servicefoundry import Service\n", - "\n", - "service = Service(\n", - " name=name,\n", - " image=image,\n", - " ports=ports,\n", - " resources=resources,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "After configuring your deployment settings, you can deploy the service using the deploy method. Here we are replacing the WORKSPACE_FQN with the workspace_fqn we stored earlier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Deploy the service\n", - "service.deploy(workspace_fqn=workspace_fqn)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once the build is complete, you will see a link to the dashboard after a message like `You can find the application on the dashboard:-`.
Click on the link to access the deployment dashboard." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Interacting with the Application\n", - "\n", - "Clicking the link will open up the dashboard dedicated to your service, where you'll have access to various details.\n", - "\n", - "Here you will be able to see the Endpoint of your service at the top right corner. You can click on the Endpoint to open your application.\n", - "\n", - "![](https://files.readme.io/142331b-8e96f01-Screenshot_2023-06-30_at_1.54.29_PM.png)\n", - "\n", - "Now you can click on one of the Images from the two options and see what predictions your model gives:\n", - "\n", - "![](https://files.readme.io/d2f8d05-bba9cc1-Screenshot_2023-06-30_at_1.57.15_PM.png)\n", - "\n", - "Congratulations! You have successfully deployed the model using Truefoundry." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Enabling Autoscaling for your Service\n", - "\n", - "In this section, we'll explore enabling autoscaling for your service, a feature that allows your application to dynamically adjust its resources based on real-time demand and predefined metrics. Autoscaling optimizes performance, responsiveness, and resource efficiency.\n", - "\n", - "## Scaling with Replicas and Pods\n", - "\n", - "In Kubernetes and containerized environments, replicas and pods are essential concepts for managing application availability and scalability.\n", - "\n", - "When deploying applications in a Kubernetes cluster, you specify the number of replicas for your service. Each replica is an identical instance of your application within a pod. A pod is the smallest deployable unit in Kubernetes, comprising one or more closely connected containers that share network and storage.\n", - "\n", - "By setting the replica count, you control how many pod instances run concurrently, directly affecting your application's traffic handling capacity.\n", - "\n", - "### Handling Demand\n", - "\n", - "More replicas allocate more pods to manage incoming traffic, distributing the workload and improving responsiveness during spikes. Scaling by adjusting replicas aligns your application's capacity with varying traffic.\n", - "\n", - "### Setting the number of replicas\n", - "\n", - "To configure the number of replicas your service should have via the PythonSDK, you can add the following to your service deployment configuration:\n", - "\n", - "```python\n", - "...\n", - "service = Service(\n", - " ...\n", - " replicas=1,\n", - " ...\n", - ")\n", - "...\n", - "```\n", - "\n", - "Next, we'll explore how autoscaling improves performance by dynamically adjusting replicas based on real-time metrics and demand.\n", - "\n", - "## Autoscaling Overview\n", - "\n", - "Autoscaling involves dynamically adjusting computing resources based on real-time demand and predefined metrics. This optimization ensures that your service efficiently utilizes resources while responding to varying traffic loads.\n", - "\n", - "### Autoscaling Configuration\n", - "\n", - "Autoscaling configuration involves setting minimum and maximum replica counts as well as defining metrics that trigger autoscaling actions. Here are the available settings for autoscaling:\n", - "\n", - "- **Minimum Replicas:** The minimum number of replicas to keep available.\n", - "- **Maximum Replicas:** The maximum number of replicas to keep available.\n", - "- **Cooldown Period:** The period to wait after the last active trigger before scaling resources back to 0.\n", - "\n", - "### Configuring Autoscaling via UI\n", - "\n", - "To configure autoscaling parameters for your service via the PythonSDK, you can add the following to your service deployment configuration:\n", - "\n", - "```python\n", - "from servicefoundry import Service, \n", - "...\n", - "service = Service(\n", - " ...\n", - " replicas=Autoscaling(\n", - " min_replicas=1,\n", - " max_replicas=3,\n", - " cooldown_period=30,\n", - " ),\n", - " ...\n", - ")\n", - "...\n", - "```\n", - "\n", - "### Autoscaling Metrics\n", - "\n", - "Autoscaling metrics guide the system in dynamically adjusting resource allocation based on real-time conditions. They ensure your service can adapt to changing demands while maintaining optimal performance. We support the following three types of autoscaling metrics:\n", - "\n", - "1. **RPSMetric (Requests Per Second Metric):** Monitors the rate of incoming requests measured in requests per second. Suitable for applications with varying request loads over time.\n", - "\n", - "2. **CPUUtilizationMetric (CPU Utilization Metric):** Monitors the percentage of CPU resources in use. Ideal for applications where performance correlates with CPU usage.\n", - "\n", - "3. **TimeRange:** Allows scheduling autoscaling actions based on specific time periods. Useful for applications with predictable traffic patterns.\n", - "\n", - "# Additional Capabilities of Services\n", - "\n", - "Let's explore additional functionalities that Services provide, extending beyond deployment strategies:\n", - "\n", - "- **Rollouts Management:** Maintain precise control over how new versions of your application are released to users, ensuring seamless transitions and minimal disruption.\n", - "- **Endpoint Authentication:** Bolster the security of your endpoints by integrating authentication mechanisms, effectively limiting access and safeguarding sensitive data.\n", - "- **Health Check Monitoring:** Monitor your services' health through comprehensive health checks, guaranteeing their operational readiness to handle incoming requests.\n", - "- **Efficient Communication with gRPC:** Leverage the power of gRPC, a high-performance communication protocol, to establish efficient and reliable connections between microservices.\n", - "- **TensorFlow Serving with gRPC:** Harness the capabilities of TensorFlow Serving in conjunction with gRPC to facilitate machine learning model deployment and communication.\n", - "- **Intercept Management:** Implement interceptors to exert fine-grained control over network communication, enhancing security measures and facilitating robust logging.\n", - "- **Scaling Deep Dive:** Gain in-depth insights into the nuances of scaling your services, optimizing resource allocation strategies to seamlessly adapt to varying demands" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": ".conda-jupyter-base", - "language": "python", - "name": "conda-env-.conda-jupyter-base-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/mnist-service/requirements.txt b/mnist-service/requirements.txt deleted file mode 100644 index 9339ed2..0000000 --- a/mnist-service/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -gradio -scikit-learn==1.0.2 -joblib -pandas -tensorflow \ No newline at end of file diff --git a/mnist-service/servicefoundry_cli_mnist.ipynb b/mnist-service/servicefoundry_cli_mnist.ipynb deleted file mode 100644 index 72d5a3f..0000000 --- a/mnist-service/servicefoundry_cli_mnist.ipynb +++ /dev/null @@ -1,493 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "QOAnoPl-dlSY", - "tags": [] - }, - "source": [ - "# Deploy Machine Learning Service on Truefoundry\n", - "This notebook demonstrates a demo on how you can deploy an image classification model trained on mnist dataset as a Gradio App on truefoundry platform." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Prerequisites\n", - "\n", - "Before we begin, make sure you have the following prerequisites in place:\n", - "\n", - "1. **Install `servicefoundry`** (Note: `servicefoundry` is pre-installed in Truefoundry notebooks). You can install it using the following command:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -U \"servicefoundry\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2. **Login to servicefoundry**\n", - "\n", - "Enter your host in the `--host` argument, eg: \"https://your-domain.truefoundry.com\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!sfy login --host \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2c6nhZIxSvl2" - }, - "source": [ - "3. **Select the `Workspace`** in which you want to deploy your application.
Once you run the cell below you will get a prompt to enter your workspace.
\n", - " * **Step 1:** Navigate to the **Workspace** tab on the left panel of your User Interface.\n", - " * **Step 2:** Identify the Workspace you want to deploy the application in.\n", - " * **Step 3:** Copy the Workspace FQN
\n", - " ![Copying Workspace FQN](https://files.readme.io/730fee2-Screenshot_2023-02-28_at_2.08.34_PM.png)\n", - " * **Step 4:** Paste the Workspace FQN in the prompt and press enter." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "elr9RXA4En1G" - }, - "outputs": [], - "source": [ - "workspace_fqn = input(\"Enter your Workspace FQN: \")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "4. **Setup Logging**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clone the Getting Started Repo\n", - "\n", - "In this step, we will clone the Truefoundry Getting Started repository. This repository contains the service code that we are going to deploy." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!git clone https://github.com/truefoundry/getting-started-examples.git" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's `cd` into the directory containing our inference code, i.e `getting-started-examples/mnist-service`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%cd getting-started-examples/mnist-service" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "T9K_qE3ol5De", - "tags": [] - }, - "source": [ - "### Code Structure\n", - "Before we proceed, let's take a quick look at the structure of the code you'll be deploying:\n", - "\n", - "```\n", - ".\n", - "|_ app.py: Contains the Gradio Service code used to serve your model.\n", - "|_ requirements.txt: Dependency file.\n", - "|_ gen_example_images.py: Code to generate example images that you can use to test your Gradio service.\n", - "|_ train.py: Contains the Training code used to train the model we are deploying.\n", - "```\n", - "\n", - "The `app.py` file houses the code that enables you to deploy and interact with your trained model using Gradio. Here's a brief overview of its components:\n", - "\n", - "* **Prediction Function:**\n", - " * The predict function is responsible for making predictions using the trained model.\n", - " * Preprocess the input image and prepare it for model inference.\n", - " * Load the trained model and perform predictions to identify the digit in the image.\n", - "* **Gradio Interface Setup:**\n", - " * Create a Gradio interface that takes an input image and produces a predicted label.\n", - " * Provide examples of images (\"0.jpg\" and \"1.jpg\") for users to easily test the interface.\n", - "\n", - "```python\n", - "import gradio as gr \n", - "import tensorflow as tf \n", - "import numpy as np \n", - "from PIL import Image\n", - "\n", - "def predict(img_arr):\n", - "\t# Preprocess the image before passing it to the model\n", - " img_arr = tf.expand_dims(img_arr, 0) \n", - " img_arr = img_arr[:, :, :, 0] # Keep only the first channel (grayscale)\n", - "\n", - "\t# Load the trained model\n", - " loaded_model = tf.keras.models.load_model('mnist_model.h5')\n", - "\n", - "\t# Make predictions\n", - " predictions = loaded_model.predict(img_arr) \n", - " predicted_label = tf.argmax(predictions[0]).numpy()\n", - "\n", - " return str(predicted_label)\n", - "\n", - "# Setup the gradio interface\n", - "gr.Interface(fn=predict, \n", - " inputs=\"image\", \n", - " outputs=\"label\", \n", - " examples=[[\"0.jpg\"], [\"1.jpg\"]] \n", - ").launch(server_name=\"0.0.0.0\", server_port=8080)\n", - "```\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TR7Ped05Crli" - }, - "source": [ - "## Deploying Your Machine Learning Service\n", - "\n", - "Now, let's move on to the deployment steps." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 1: Set Up Deployment Configuration\n", - "In this step, you will define your deployment configuration using the ServiceFoundry CLI. We will provide explanations for each configuration, and afterwards we will bring all of this together into a `servicefoundry.yaml` file.
\n", - "\n", - "For deploying via ServiceFoundry CLI you need to create a `servicefoundry.yaml` file with a YAML configuration of your Deployment\n", - "\n", - "#### Name\n", - "In the provided Python script, set a unique identifier for your service using the name field.\n", - "\n", - "```yaml\n", - "name: mnist-service\n", - "```\n", - "\n", - "#### Image\n", - "\n", - "* Choosing the Right Approach for specifying image:\n", - " Depending on your scenario, you can choose to deploy either a pre-built Docker image or build a Docker image from your source code.\n", - " \n", - "* Using Pre-Built Images\n", - " If you already have a Docker image that you've previously built and pushed to a container registry, you can use `type: image`.\n", - " The `type: image` would simply reference the pre-built image URL and use it for deployment.\n", - "* Using Build for Source Code\n", - " In cases where you don't have a pre-built image, you'll use the Build option to create an image from your source code.\n", - " This scenario applies when you want to package and deploy your application from scratch.\n", - " * Creating DockerFile with PythonBuild\n", - " If you don't have a Dockerfile but your application is written in Python, you can use the `type: tfy-python-buildpack`.\n", - " The PythonBuild class will inspect your Python code and create a Dockerfile automatically based on the code's requirements.\n", - " * Choosing DockerBuild for Dockerfile\n", - " If you have a pre-existing Dockerfile, you can use the `type: dockerfile`.\n", - " This allows you to directly reference the Dockerfile present in your code repository.\n", - "\n", - "In this case given we did not have a prebuilt image, and no dockerfile in our source code we are using `tfy-python-buildpack`, which takes our code configuration from us and templatizes a Dockerfile for us.\n", - "\n", - "```yaml\n", - "image:\n", - " type: build\n", - " build_spec:\n", - " type: tfy-python-buildpack\n", - " command: python app.py\n", - " python_version: '3.9'\n", - " requirements_path: requirements.txt\n", - " build_context_path: ./\n", - " build_source:\n", - " type: local\n", - "```\n", - "\n", - "#### Ports\n", - "* Specify the **Port** for routing customer traffic to your deployed application using the Port option.\n", - "* Specify the **Host** to define how the external world will access your application.\n", - "\n", - "\n", - "> 📘 Picking a value for `host`\n", - "> \n", - "> Providing a host value depends on the base domain urls configured in the cluster settings, you can learn how to find the base domain urls available to you [here](doc:checking-configured-domain)\n", - "> \n", - "> For e.g. If your base domain url is `*.truefoundry.your-org.com` then a valid value can be `fastapi-your-workspace-8000.truefoundry.your-org.com`. \n", - "> \n", - "> Alternatively if you have a non wildcard based domain url e.g. `truefoundry.your-org.com`, then a valid value can be `truefoundry.your-org.com/fastapi-your-workspace-8000`\n", - "\n", - "```yaml\n", - "ports:\n", - " - host: \n", - " port: 8080\n", - "```\n", - "\n", - "#### Resources\n", - "Allocate computing resources (CPU, memory, storage) for your service using the Resources option.
\n", - "* **CPU** refers to the computing power available to your application\n", - "* **Memory** refers to how much space your application has to hold and work with data while it's running\n", - "* **Ephemeral storage** is where your application can temporarily store files and data\n", - "\n", - "Requests and Limits:\n", - "\n", - "* **Request** is like asking for a certain amount of a resource. It's what your application initially asks for to start working properly.\n", - "* **Limit** is like setting a maximum value. It restricts how much of a resource (like CPU or memory) your application can use.\n", - "\n", - "So for each category of resource you specify the Request and Limits\n", - "\n", - "```yaml\n", - "resources:\n", - " cpu_limit: 0.3\n", - " gpu_count: 0\n", - " cpu_request: 0.3\n", - " memory_limit: 500\n", - " memory_request: 500\n", - " ephemeral_storage_limit: 600\n", - " ephemeral_storage_request: 600\n", - "```\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2: Bring all of the configuration together via the Service Class and Deploy\n", - "\n", - "To deploy your machine learning service, you need to create a `servicefoundry.yaml` file with the configuration library. This will encapsulate all the necessary configurations and parameters for deploying and managing your service." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile servicefoundry.yaml\n", - "name: mnist-service\n", - "type: service\n", - "image:\n", - " type: build\n", - " build_spec:\n", - " type: tfy-python-buildpack\n", - " command: python app.py\n", - " python_version: '3.9'\n", - " requirements_path: requirements.txt\n", - " build_context_path: ./\n", - " build_source:\n", - " type: local\n", - "ports:\n", - " - host: \n", - " port: 8080\n", - "replicas: 1\n", - "resources:\n", - " cpu_limit: 0.3\n", - " gpu_count: 0\n", - " cpu_request: 0.3\n", - " memory_limit: 500\n", - " memory_request: 500\n", - " ephemeral_storage_limit: 600\n", - " ephemeral_storage_request: 600" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "After configuring your deployment settings, you can deploy the service using the deploy method. Here we are replacing the WORKSPACE_FQN with the workspace_fqn we stored earlier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Deploy the service\n", - "!servicefoundry deploy --workspace-fqn=YOUR_WORKSPACE_FQN" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once the build is complete, you will see a link to the dashboard after a message like `You can find the application on the dashboard:-`.
Click on the link to access the deployment dashboard." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Interacting with the Application\n", - "\n", - "Clicking the link will open up the dashboard dedicated to your service, where you'll have access to various details.\n", - "\n", - "Here you will be able to see the Endpoint of your service at the top right corner. You can click on the Endpoint to open your application.\n", - "\n", - "![](https://files.readme.io/142331b-8e96f01-Screenshot_2023-06-30_at_1.54.29_PM.png)\n", - "\n", - "Now you can click on one of the Images from the two options and see what predictions your model gives:\n", - "\n", - "![](https://files.readme.io/d2f8d05-bba9cc1-Screenshot_2023-06-30_at_1.57.15_PM.png)\n", - "\n", - "Congratulations! You have successfully deployed the model using Truefoundry." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Enabling Autoscaling for your Service\n", - "\n", - "In this section, we'll explore enabling autoscaling for your service, a feature that allows your application to dynamically adjust its resources based on real-time demand and predefined metrics. Autoscaling optimizes performance, responsiveness, and resource efficiency.\n", - "\n", - "## Scaling with Replicas and Pods\n", - "\n", - "In Kubernetes and containerized environments, replicas and pods are essential concepts for managing application availability and scalability.\n", - "\n", - "When deploying applications in a Kubernetes cluster, you specify the number of replicas for your service. Each replica is an identical instance of your application within a pod. A pod is the smallest deployable unit in Kubernetes, comprising one or more closely connected containers that share network and storage.\n", - "\n", - "By setting the replica count, you control how many pod instances run concurrently, directly affecting your application's traffic handling capacity.\n", - "\n", - "### Handling Demand\n", - "\n", - "More replicas allocate more pods to manage incoming traffic, distributing the workload and improving responsiveness during spikes. Scaling by adjusting replicas aligns your application's capacity with varying traffic.\n", - "\n", - "### Setting the number of replicas\n", - "\n", - "To configure the number of replicas your service should have via the ServiceFoundry CLI, you can add the following to your service deployment configuration:\n", - "\n", - "```yaml\n", - "...\n", - "replicas: 1\n", - "...\n", - "```\n", - "\n", - "Next, we'll explore how autoscaling improves performance by dynamically adjusting replicas based on real-time metrics and demand.\n", - "\n", - "## Autoscaling Overview\n", - "\n", - "Autoscaling involves dynamically adjusting computing resources based on real-time demand and predefined metrics. This optimization ensures that your service efficiently utilizes resources while responding to varying traffic loads.\n", - "\n", - "### Autoscaling Configuration\n", - "\n", - "Autoscaling configuration involves setting minimum and maximum replica counts as well as defining metrics that trigger autoscaling actions. Here are the available settings for autoscaling:\n", - "\n", - "- **Minimum Replicas:** The minimum number of replicas to keep available.\n", - "- **Maximum Replicas:** The maximum number of replicas to keep available.\n", - "- **Cooldown Period:** The period to wait after the last active trigger before scaling resources back to 0.\n", - "\n", - "### Configuring Autoscaling via UI\n", - "\n", - "To configure autoscaling parameters for your service via the ServiceFoundry CLI, you can add the following to your service deployment configuration:\n", - "\n", - "```yaml\n", - "replicas:\n", - " min_replicas: 1\n", - " max_replicas: 3\n", - " cooldown_period: 300\n", - "...\n", - "```\n", - "\n", - "### Autoscaling Metrics\n", - "\n", - "Autoscaling metrics guide the system in dynamically adjusting resource allocation based on real-time conditions. They ensure your service can adapt to changing demands while maintaining optimal performance. We support the following three types of autoscaling metrics:\n", - "\n", - "1. **RPSMetric (Requests Per Second Metric):** Monitors the rate of incoming requests measured in requests per second. Suitable for applications with varying request loads over time.\n", - "\n", - "2. **CPUUtilizationMetric (CPU Utilization Metric):** Monitors the percentage of CPU resources in use. Ideal for applications where performance correlates with CPU usage.\n", - "\n", - "3. **TimeRange:** Allows scheduling autoscaling actions based on specific time periods. Useful for applications with predictable traffic patterns.\n", - "\n", - "# Additional Capabilities of Services\n", - "\n", - "Let's explore additional functionalities that Services provide, extending beyond deployment strategies:\n", - "\n", - "- **Rollouts Management:** Maintain precise control over how new versions of your application are released to users, ensuring seamless transitions and minimal disruption.\n", - "- **Endpoint Authentication:** Bolster the security of your endpoints by integrating authentication mechanisms, effectively limiting access and safeguarding sensitive data.\n", - "- **Health Check Monitoring:** Monitor your services' health through comprehensive health checks, guaranteeing their operational readiness to handle incoming requests.\n", - "- **Efficient Communication with gRPC:** Leverage the power of gRPC, a high-performance communication protocol, to establish efficient and reliable connections between microservices.\n", - "- **TensorFlow Serving with gRPC:** Harness the capabilities of TensorFlow Serving in conjunction with gRPC to facilitate machine learning model deployment and communication.\n", - "- **Intercept Management:** Implement interceptors to exert fine-grained control over network communication, enhancing security measures and facilitating robust logging.\n", - "- **Scaling Deep Dive:** Gain in-depth insights into the nuances of scaling your services, optimizing resource allocation strategies to seamlessly adapt to varying demands" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": ".conda-jupyter-base", - "language": "python", - "name": "conda-env-.conda-jupyter-base-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/mnist-service/train.py b/mnist-service/train.py deleted file mode 100644 index c962d73..0000000 --- a/mnist-service/train.py +++ /dev/null @@ -1,38 +0,0 @@ -import tensorflow as tf -from tensorflow.keras.datasets import mnist - -# Load the MNIST dataset - -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -# Normalize the pixel values between 0 and 1 - -x_train = x_train / 255.0 -x_test = x_test / 255.0 - -# Define the model architecture - -model = tf.keras.Sequential([ - tf.keras.layers.Flatten(input_shape=(28, 28)), - tf.keras.layers.Dense(128, activation='relu'), - tf.keras.layers.Dense(10, activation='softmax') -]) - -# Compile the model - -model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) - -# Train the model - -epochs = 5 -model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test)) - -# Evaluate the model - -loss, accuracy = model.evaluate(x_test, y_test) -print(f'Test loss: {loss}') -print(f'Test accuracy: {accuracy}') - -# Save the trained model - -model.save('mnist_model.h5') \ No newline at end of file