diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 21d938d0..0bc7070f 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -5,7 +5,7 @@ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", "features": { - "ghcr.io/devcontainers/features/azure-cli:1": {}, + "ghcr.io/devcontainers/features/azure-cli:1": {} }, // Features to add to the dev container. More info: https://containers.dev/features. @@ -25,6 +25,7 @@ "GitHub.vscode-pull-request-github", "ms-python.python", "ms-python.black-formatter", + "stkb.rewrap" ] } } diff --git a/.env.template b/.env.template index b5056bf4..189d5485 100644 --- a/.env.template +++ b/.env.template @@ -1,9 +1,6 @@ NACHET_AZURE_STORAGE_CONNECTION_STRING= -NACHET_MODEL_ENDPOINT_REST_URL= -NACHET_MODEL_ENDPOINT_ACCESS_KEY= NACHET_DATA= -NACHET_SUBSCRIPTION_ID= -NACHET_RESOURCE_GROUP= -NACHET_WORKSPACE= -NACHET_MODEL= +NACHET_BLOB_PIPELINE_NAME= +NACHET_BLOB_PIPELINE_VERSION= +NACHET_BLOB_PIPELINE_DECRYPTION_KEY= NACHET_MAX_CONTENT_LENGTH= diff --git a/.github/workflows/build-push-docker.yml b/.github/workflows/build-push-docker.yml deleted file mode 100644 index cb994137..00000000 --- a/.github/workflows/build-push-docker.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Docker build and push to ghcr.io/ai-cfia and lint test - -on: - pull_request: - types: - - opened - - closed - - synchronize - -jobs: - lint-test: - uses: ai-cfia/github-workflows/.github/workflows/workflow-lint-test-python.yml@main - secrets: inherit - - deploy: - uses: ai-cfia/github-workflows/.github/workflows/workflow-build-push-container-github-registry.yml@63-as-a-devops-i-would-like-to-create-a-workflow-to-push-images-to-this-organisation-docker-registry - with: - container-name: ${{ github.event.repository.name }} - tag: ${{ github.sha }} - registry: ghcr.io/ai-cfia - secrets: inherit - diff --git a/.github/workflows/workflows.yml b/.github/workflows/workflows.yml new file mode 100644 index 00000000..03442dbc --- /dev/null +++ b/.github/workflows/workflows.yml @@ -0,0 +1,36 @@ +name: Docker build and push to ghcr.io/ai-cfia and lint test + +on: + pull_request: + types: + - opened + - closed + - synchronize + +jobs: + lint-test: + uses: + ai-cfia/github-workflows/.github/workflows/workflow-lint-test-python.yml@main + secrets: inherit + + deploy: + uses: + ai-cfia/github-workflows/.github/workflows/workflow-build-push-container-github-registry.yml@main + with: + container-name: ${{ github.event.repository.name }} + tag: ${{ github.sha }} + registry: ghcr.io/ai-cfia + secrets: inherit + + repo-standard: + uses: + ai-cfia/github-workflows/.github/workflows/workflow-repo-standards-validation.yml@main + secrets: inherit + + markdown-check: + uses: + ai-cfia/github-workflows/.github/workflows/workflow-markdown-check.yml@main + + yaml-check: + uses: + ai-cfia/github-workflows/.github/workflows/workflow-yaml-check.yml@main diff --git a/.gitignore b/.gitignore index f7834052..03010a78 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ .vscode/settings.json *.pyc .vscode/settings.json -output.* \ No newline at end of file +output.* diff --git a/README.md b/README.md index 9f2698ba..47b5a275 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ frontend-->>-Client: display inference res. backend->>+AzureStorageAPI: (async) upload_inference_result(json) ``` -**Details** +### Details - The backend was built with the [Quart](http://pgjones.gitlab.io/quart/) framework - Quart is an asyncio reimplementation of Flask @@ -41,35 +41,61 @@ backend->>+AzureStorageAPI: (async) upload_inference_result(json) **** ### RUNNING NACHET-BACKEND FROM DEVCONTAINER -When you are developping, you can run the program while in the devcontainer by using this command: -``` + +When you are developping, you can run the program while in the devcontainer by +using this command: + +```bash hypercorn -b :8080 app:app ``` ### RUNNING NACHET-BACKEND AS A DOCKER CONTAINER + If you want to run the program as a Docker container (e.g., for production), use: -``` + +```bash docker build -t nachet-backend . docker run -p 8080:8080 -v $(pwd):/app nachet-backend ``` ### TESTING NACHET-BACKEND + To test the program, use this command: -``` + +```bash python -m unittest discover -s tests ``` **** + ### ENVIRONMENT VARIABLES -Start by making a copy of `.env.template` and renaming it `.env`. For the backend to function, you will need to add the missing values: -* **NACHET_AZURE_STORAGE_CONNECTION_STRING**: Connection string to access external storage (Azure Blob Storage). -* **NACHET_MODEL_ENDPOINT_REST_URL**: Endpoint to communicate with deployed model for inferencing. -* **NACHET_MODEL_ENDPOINT_ACCESS_KEY**: Key used when consuming online endpoint. -* **NACHET_DATA**: Url to access nachet-data repository -* **NACHET_HEALTH_MESSAGE**: Health check message for the server. -* **NACHET_MAX_CONTENT_LENGTH**: Set the maximum size of the file that can be uploaded to the backend. Needs to be the same size as the `client_max_body_size` [value](https://github.com/ai-cfia/howard/blob/dedee069f051ba743122084fcb5d5c97c2499359/kubernetes/aks/apps/nachet/base/nachet-ingress.yaml#L13) set from the deployment in Howard. +Start by making a copy of `.env.template` and renaming it `.env`. For the +backend to function, you will need to add the missing values: + +- **NACHET_AZURE_STORAGE_CONNECTION_STRING**: Connection string to access + external storage (Azure Blob Storage). +- **NACHET_MODEL_ENDPOINT_REST_URL**: Endpoint to communicate with deployed + model for inferencing. +- **NACHET_MODEL_ENDPOINT_ACCESS_KEY**: Key used when consuming online endpoint. +- **NACHET_DATA**: Url to access nachet-data repository +- **NACHET_HEALTH_MESSAGE**: Health check message for the server. +- **NACHET_MAX_CONTENT_LENGTH**: Set the maximum size of the file that can be + uploaded to the backend. Needs to be the same size as the + `client_max_body_size` + [value](https://github.com/ai-cfia/howard/blob/dedee069f051ba743122084fcb5d5c97c2499359/kubernetes/aks/apps/nachet/base/nachet-ingress.yaml#L13) + set from the deployment in Howard. +- **NACHET_SUBSCRIPTION_ID** +- **NACHET_RESOURCE_GROUP** +- **NACHET_WORKSPACE** +- **NACHET_MODEL** +- **NACHET_BLOB_PIPELINE_NAME** +- **NACHET_BLOB_PIPELINE_VERSION** +- **NACHET_BLOB_PIPELINE_DECRYPTION_KEY** **** + ### DEPLOYING NACHET -If you need help deploying Nachet for your own needs, please contact us at cfia.ai-ia.acia@inspection.gc.ca. + +If you need help deploying Nachet for your own needs, please contact us at +. diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 00000000..5e1d5ede --- /dev/null +++ b/TESTING.md @@ -0,0 +1,103 @@ +# Testing documentation + +To start the automatic test, you can use the following command: + +```bash +python -m unittest discover -s tests +``` + +You also have the option to run automatic test in run_test.py or manually test +the functionality with the frontend. [See frontend testing +documentation](https://github.com/ai-cfia/nachet-frontend/blob/main/TESTING.md) + +To perform the following test, you will need the frontend repository for the +Nachet Interactive's application. The frontend can be found at: [Nachet Frontend +GitHub Repository](https://github.com/ai-cfia/nachet-frontend). + +You will also need the list of the available pipelines. The list can be found +[here](https://github.com/ai-cfia/nachet-backend/blob/51-implementing-2-models/docs/nachet-inference-documentation.md#available-version-of-the-json-file). + +--- + +## Test Case: Populate model selection with pipelines information + +**Objective**: Verify that the model selection component gets populated with the +pipeline information. + +**Preconditions:** + +- [ ] Nachet backend is set up and running. Use the command `hypercorn -b :8080 + app:app` to start the quartz server. +- [ ] The environment variables are all set. +- [ ] :exclamation: The frontend is not running yet + +**Test Steps:** + +1. Start the frontend application +1. Click on the model selection button +1. Validate that the current pipeline is selectable. + +**Expected Results:** + +- [ ] If a problem occurs while retrieving the data, an error should prevent the + server from starting. +- [ ] If a problem occurs while retrieving the data, but no error was raised, + the model selection component should be empty. +- [ ] If everything went correctly while retrieving the data, the model + selection component should display the pipeline metadata. + +**Actual Results:** + +- [ ] Describe the actual outcome of the test + +**Pass/Fail Criteria:** + +- [ ] Pass if the metadata from the available pipeline is displayed. +- [ ] Fail if the metadata from the available pipeline is not displayed and no + error was raised. + +--- + +## Test Case: Inference Request + +**Objective**: Verify that the inference request endpoint `/inf` behaves as +expected. + +**Preconditions:** + +- [ ] Nachet backend is set up and running. Use the command `hypercorn -b :8080 + app:app` to start the quartz server. +- [ ] The environment variables are all set. +- [ ] The frontend is running. +- [ ] Start the frontend application +- [ ] Click on the model selection button +- [ ] Validate that the current pipeline is selectable + +**Test Steps:** + +1. Upload a seed image +1. Select the first model (pipeline) +1. Click on the classify button +1. Wait until the results populated on the canvas +1. Repeat the process for every model (pipeline) + +|:boom: Warning| |:--:| |Displaying results from two different models will +overlap and become unreadable.| + +**Expected Results:** + +- [ ] The data populates both the canvas and the results components with the + prediction data from the model (pipeline). +- [ ] An alert with an error from port 3000 or 8080 is displayed. + +**Actual Results:** + +- [ ] Describe the actual outcome of the test + +**Pass/Fail Criteria:** + +- [ ] Pass if the data populates both the canvas and the results components with + the prediction of the model (pipeline). +- [ ] Fail if an alert is displayed with an error message. +- [ ] Fail if the data does not populates the canvas and the results component +- [ ] Fail if the inference is stuck in an infinite loop diff --git a/app.py b/app.py index 619ca213..2774841f 100644 --- a/app.py +++ b/app.py @@ -3,58 +3,68 @@ import os import base64 import re +import time +import model.inference as inference +from model import request_function + +from datetime import date from dotenv import load_dotenv from quart import Quart, request, jsonify from quart_cors import cors +from collections import namedtuple +from cryptography.fernet import Fernet import azure_storage.azure_storage_api as azure_storage_api -import model.inference as inference + from custom_exceptions import ( DeleteDirectoryRequestError, ListDirectoriesRequestError, InferenceRequestError, CreateDirectoryRequestError, ServerError, + PipelineNotFoundError, + ConnectionStringError ) load_dotenv() connection_string_regex = r"^DefaultEndpointsProtocol=https?;.*;FileEndpoint=https://[a-zA-Z0-9]+\.file\.core\.windows\.net/;$" connection_string = os.getenv("NACHET_AZURE_STORAGE_CONNECTION_STRING") -endpoint_url_regex = r"^https://.*\/score$" -endpoint_url = os.getenv("NACHET_MODEL_ENDPOINT_REST_URL") -endpoint_api_key = os.getenv("NACHET_MODEL_ENDPOINT_ACCESS_KEY") +FERNET_KEY = os.getenv("NACHET_BLOB_PIPELINE_DECRYPTION_KEY") +PIPELINE_VERSION = os.getenv("NACHET_BLOB_PIPELINE_VERSION") +PIPELINE_BLOB_NAME = os.getenv("NACHET_BLOB_PIPELINE_NAME") NACHET_DATA = os.getenv("NACHET_DATA") NACHET_MODEL = os.getenv("NACHET_MODEL") +try: + MAX_CONTENT_LENGTH = int(os.getenv("NACHET_MAX_CONTENT_LENGTH")) +except (TypeError, ValueError): + MAX_CONTENT_LENGTH = 16 + + +Model = namedtuple( + 'Model', + [ + 'request_function', + 'name', + 'endpoint', + 'api_key', + 'content_type', + 'deployment_platform', + ] +) + CACHE = { - 'seeds': None, - 'endpoints': None + "seeds": None, + "endpoints": None, + "pipelines": {}, } -# Check: do environment variables exist? -if connection_string is None: - raise ServerError("Missing environment variable: NACHET_AZURE_STORAGE_CONNECTION_STRING") - -if endpoint_url is None: - raise ServerError("Missing environment variable: NACHET_MODEL_ENDPOINT_REST_URL") - -if endpoint_api_key is None: - raise ServerError("Missing environment variables: NACHET_MODEL_ENDPOINT_ACCESS_KEY") - -# Check: are environment variables correct? -if not bool(re.match(connection_string_regex, connection_string)): - raise ServerError("Incorrect environment variable: NACHET_AZURE_STORAGE_CONNECTION_STRING") - -if not bool(re.match(endpoint_url_regex, endpoint_url)): - raise ServerError("Incorrect environment variable: NACHET_MODEL_ENDPOINT_ACCESS_KEY") - app = Quart(__name__) app = cors(app, allow_origin="*", allow_methods=["GET", "POST", "OPTIONS"]) -mb = int(os.getenv("NACHET_MAX_CONTENT_LENGTH")) +app.config["MAX_CONTENT_LENGTH"] = MAX_CONTENT_LENGTH * 1024 * 1024 -app.config["MAX_CONTENT_LENGTH"] = mb * 1024 * 1024 @app.post("/del") async def delete_directory(): @@ -63,12 +73,11 @@ async def delete_directory(): """ try: data = await request.get_json() - connection_string: str = os.environ["NACHET_AZURE_STORAGE_CONNECTION_STRING"] container_name = data["container_name"] folder_name = data["folder_name"] if container_name and folder_name: container_client = await azure_storage_api.mount_container( - connection_string, container_name, create_container=False + app.config["BLOB_CLIENT"], container_name, create_container=False ) if container_client: folder_uuid = await azure_storage_api.get_folder_uuid( @@ -99,11 +108,10 @@ async def list_directories(): """ try: data = await request.get_json() - connection_string: str = os.environ["NACHET_AZURE_STORAGE_CONNECTION_STRING"] container_name = data["container_name"] if container_name: container_client = await azure_storage_api.mount_container( - connection_string, container_name, create_container=True + app.config["BLOB_CLIENT"], container_name, create_container=True ) response = await azure_storage_api.get_directories(container_client) return jsonify(response), 200 @@ -122,12 +130,11 @@ async def create_directory(): """ try: data = await request.get_json() - connection_string: str = os.environ["NACHET_AZURE_STORAGE_CONNECTION_STRING"] container_name = data["container_name"] folder_name = data["folder_name"] if container_name and folder_name: container_client = await azure_storage_api.mount_container( - connection_string, container_name, create_container=False + app.config["BLOB_CLIENT"], container_name, create_container=False ) response = await azure_storage_api.create_folder( container_client, folder_name @@ -147,75 +154,90 @@ async def create_directory(): @app.post("/inf") async def inference_request(): """ - performs inference on an image, and returns the results. - The image and inference results uploaded to a folder in the user's container. + Performs inference on an image, and returns the results. + The image and inference results are uploaded to a folder in the user's container. """ + + seconds = time.perf_counter() # TODO: transform into logging try: + print(f"{date.today()} Entering inference request") # TODO: Transform into logging data = await request.get_json() - connection_string: str = os.environ["NACHET_AZURE_STORAGE_CONNECTION_STRING"] + pipeline_name = data.get("model_name") folder_name = data["folder_name"] container_name = data["container_name"] imageDims = data["imageDims"] image_base64 = data["image"] - if folder_name and container_name and imageDims and image_base64: - header, encoded_data = image_base64.split(",", 1) - image_bytes = base64.b64decode(encoded_data) - container_client = await azure_storage_api.mount_container( - connection_string, container_name, create_container=True - ) - hash_value = await azure_storage_api.generate_hash(image_bytes) - blob_name = await azure_storage_api.upload_image( - container_client, folder_name, image_bytes, hash_value - ) - blob = await azure_storage_api.get_blob(container_client, blob_name) - image_bytes = base64.b64encode(blob).decode("utf8") - data = { - "input_data": { - "columns": ["image"], - "index": [0], - "data": [image_bytes], - } - } - # encode the data as json to be sent to the model endpoint - body = str.encode(json.dumps(data)) - endpoint_url = os.getenv("NACHET_MODEL_ENDPOINT_REST_URL") - endpoint_api_key = os.getenv("NACHET_MODEL_ENDPOINT_ACCESS_KEY") - headers = { - "Content-Type": "application/json", - "Authorization": ("Bearer " + endpoint_api_key), - } - # send the request to the model endpoint - req = urllib.request.Request(endpoint_url, body, headers) - try: - # get the response from the model endpoint - response = urllib.request.urlopen(req) - result = response.read() - result_json = json.loads(result.decode("utf-8")) - # process the inference results - processed_result_json = await inference.process_inference_results( - result_json, imageDims - ) - # upload the inference results to the user's container as async task - result_json_string = json.dumps(processed_result_json) - app.add_background_task( - azure_storage_api.upload_inference_result, - container_client, - folder_name, - result_json_string, - hash_value, - ) - # return the inference results to the client - return jsonify(processed_result_json), 200 - except urllib.error.HTTPError as error: - print(error) - return jsonify(["endpoint cannot be reached" + str(error.code)]), 400 - else: - return jsonify(["missing request arguments"]), 400 + area_ratio = data.get("area_ratio", 0.5) + color_format = data.get("color_format", "hex") + + print(f"Requested by user: {container_name}") # TODO: Transform into logging + pipelines_endpoints = CACHE.get("pipelines") + blob_service_client = app.config.get("BLOB_CLIENT") + + if not (folder_name and container_name and imageDims and image_base64): + raise InferenceRequestError( + "missing request arguments: either folder_name, container_name, imageDims or image is missing") + + if not pipelines_endpoints.get(pipeline_name): + raise InferenceRequestError(f"model {pipeline_name} not found") + + header, encoded_data = image_base64.split(",", 1) + + # Validate image header #TODO with magic header + if not header.startswith("data:image/"): + raise InferenceRequestError("invalid image header") + + # Keep track of every output given by the models + # TODO: add it to CACHE variable + cache_json_result = [encoded_data] + image_bytes = base64.b64decode(encoded_data) + + container_client = await azure_storage_api.mount_container( + blob_service_client, container_name, create_container=True + ) + hash_value = await azure_storage_api.generate_hash(image_bytes) + await azure_storage_api.upload_image( + container_client, folder_name, image_bytes, hash_value + ) + + for idx, model in enumerate(pipelines_endpoints.get(pipeline_name)): + print(f"Entering {model.name.upper()} model") # TODO: Transform into logging + result_json = await model.request_function(model, cache_json_result[idx]) + cache_json_result.append(result_json) + + print("End of inference request") # TODO: Transform into logging + print("Process results") # TODO: Transform into logging + + processed_result_json = await inference.process_inference_results( + cache_json_result[-1], imageDims, area_ratio, color_format + ) + + result_json_string = json.dumps(processed_result_json) + + # upload the inference results to the user's container as async task + app.add_background_task( + azure_storage_api.upload_inference_result, + container_client, + folder_name, + result_json_string, + hash_value, + ) + # return the inference results to the client + print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") # TODO: Transform into logging + return jsonify(processed_result_json), 200 + + except (KeyError, InferenceRequestError) as error: + print(error) + return jsonify(["InferenceRequestError: " + error.args[0]]), 400 - except InferenceRequestError as error: + except Exception as error: print(error) - return jsonify(["InferenceRequestError: " + str(error)]), 400 + return jsonify(["Unexpected error occured"]), 500 + +@app.get("/coffee") +async def get_coffee(): + return jsonify("Tea is great!"), 418 @app.get("/seed-data/") @@ -249,7 +271,7 @@ async def get_model_endpoints_metadata(): if CACHE['endpoints']: return jsonify(CACHE['endpoints']), 200 else: - return jsonify("Error retrieving model endpoints metadata.", 400) + return jsonify("Error retrieving model endpoints metadata.", 404) @app.get("/health") @@ -257,28 +279,116 @@ async def health(): return "ok", 200 +@app.get("/test") +async def test(): + # Build test pipeline + CACHE["endpoints"] = [ + { + "pipeline_name": "test_pipeline", + "models": ["test_model1"] + } + ] + # Built test model + m = Model( + request_function["test"], + "test_model1", + "http://localhost:8080/test_model1", + "test_api_key", + "application/json", + "test_platform" + ) + + CACHE["pipelines"]["test_pipeline"] = (m,) + + return CACHE["endpoints"], 200 + async def fetch_json(repo_URL, key, file_path): """ Fetches JSON document from a GitHub repository and caches it """ try: - json_url = os.path.join(repo_URL, file_path) - with urllib.request.urlopen(json_url) as response: - result = response.read() - result_json = json.loads(result.decode("utf-8")) - CACHE[key] = result_json + if key != "endpoints": + json_url = os.path.join(repo_URL, file_path) + with urllib.request.urlopen(json_url) as response: + result = response.read() + result_json = json.loads(result.decode("utf-8")) + return result_json + except urllib.error.HTTPError as error: - return jsonify({"error": f"Failed to retrieve the JSON. \ - HTTP Status Code: {error.code}"}), 400 + raise ValueError(str(error)) except Exception as e: - return jsonify({"error": str(e)}), 500 + raise ValueError(str(e)) + + +async def get_pipelines(): + """ + Retrieves the pipelines from the Azure storage API. + + Returns: + - list: A list of dictionaries representing the pipelines. + """ + try: + app.config["BLOB_CLIENT"] = await azure_storage_api.get_blob_client(connection_string) + result_json = await azure_storage_api.get_pipeline_info(app.config["BLOB_CLIENT"], PIPELINE_BLOB_NAME, PIPELINE_VERSION) + cipher_suite = Fernet(FERNET_KEY) + except (ConnectionStringError, PipelineNotFoundError) as error: + print(error) + raise ServerError("server errror: could not retrieve the pipelines") from error + + models = () + for model in result_json.get("models"): + m = Model( + request_function.get(model.get("api_call_function")), + model.get("model_name"), + # To protect sensible data (API key and model endpoint), we encrypt it when + # it's pushed into the blob storage. Once we retrieve the data here in the + # backend, we need to decrypt the byte format to recover the original + # data. + cipher_suite.decrypt(model.get("endpoint").encode()).decode(), + cipher_suite.decrypt(model.get("api_key").encode()).decode(), + model.get("content_type"), + model.get("deployment_platform") + ) + models += (m,) + # Build the pipeline to call the models in order in the inference request + for pipeline in result_json.get("pipelines"): + CACHE["pipelines"][pipeline.get("pipeline_name")] = tuple([m for m in models if m.name in pipeline.get("models")]) + + return result_json.get("pipelines") @app.before_serving async def before_serving(): - await fetch_json(NACHET_DATA, 'seeds', "seeds/all.json") - await fetch_json(NACHET_MODEL, 'endpoints', 'model_endpoints_metadata.json') + try: + # Check: do environment variables exist? + if connection_string is None: + raise ServerError("Missing environment variable: NACHET_AZURE_STORAGE_CONNECTION_STRING") + + if FERNET_KEY is None: + raise ServerError("Missing environment variable: FERNET_KEY") + # Check: are environment variables correct? + if not bool(re.match(connection_string_regex, connection_string)): + raise ServerError("Incorrect environment variable: NACHET_AZURE_STORAGE_CONNECTION_STRING") + + CACHE["seeds"] = await fetch_json(NACHET_DATA, "seeds", "seeds/all.json") + CACHE["endpoints"] = await get_pipelines() + + print( + f"""Server start with current configuration:\n + date: {date.today()} + file version of pipelines: {PIPELINE_VERSION} + pipelines: {[pipeline for pipeline in CACHE["pipelines"].keys()]}\n + """ + ) #TODO Transform into logging + + except ServerError as e: + print(e) + raise + + except Exception as e: + print(e) + raise ServerError("Failed to retrieve data from the repository") if __name__ == "__main__": app.run(debug=True, host="0.0.0.0", port=8080) diff --git a/azure_storage/azure_storage_api.py b/azure_storage/azure_storage_api.py index 2d27a3d9..ed434824 100644 --- a/azure_storage/azure_storage_api.py +++ b/azure_storage/azure_storage_api.py @@ -2,7 +2,8 @@ import uuid import hashlib import datetime -from azure.storage.blob import BlobServiceClient +from azure.storage.blob import BlobServiceClient, ContainerClient +from azure.core.exceptions import ResourceNotFoundError from custom_exceptions import ( ConnectionStringError, MountContainerError, @@ -13,23 +14,23 @@ FolderListError, GenerateHashError, CreateDirectoryError, + PipelineNotFoundError, ) """ ----- user-container based structure ----- -- container name is user id -- whenever a new user is created, a new container is created with the user uuid -- inside the container, there are project folders (project name = project uuid) -- for each project folder, there is a json file with the project info and creation -date, in the container -- inside the project folder, there is an image file and a json file with -the image inference results +---- user-container based structure ----- - container name is user id - whenever +a new user is created, a new container is created with the user uuid - inside +the container, there are project folders (project name = project uuid) - for +each project folder, there is a json file with the project info and creation +date, in the container - inside the project folder, there is an image file and a +json file with the image inference results """ async def generate_hash(image): """ - generates a hash value for the image to be used as the image name in the container + generates a hash value for the image to be used as the image name in the + container """ try: hash = hashlib.sha256(image).hexdigest() @@ -38,40 +39,52 @@ async def generate_hash(image): except GenerateHashError as error: print(error) - -async def mount_container(connection_string, container_uuid, create_container=True): +async def get_blob_client(connection_string: str): """ - given a connection string and a container name, mounts the container and - returns the container client as an object that can be used in other functions. - if a specified container doesnt exist, it creates one with the provided uuid, - if create_container is True + given a connection string, returns the blob client object """ try: blob_service_client = BlobServiceClient.from_connection_string( connection_string ) - if blob_service_client: - container_name = "user-{}".format(container_uuid) - container_client = blob_service_client.get_container_client(container_name) - if container_client.exists(): - return container_client - elif create_container and not container_client.exists(): - container_client = blob_service_client.create_container(container_name) - # create general directory for new user container - response = await create_folder(container_client, "General") - if response: - return container_client - else: - return False - else: - raise ConnectionStringError("Invalid connection string") + if blob_service_client is None: + raise ValueError(f"the given connection string is invalid: {connection_string}") + return blob_service_client + except ValueError as error: + print(error) + raise ConnectionStringError(error.args[0]) from error + + +async def mount_container( + blob_service_client: BlobServiceClient, + container_uuid: str, + create_container: bool =True): + """ + given a connection string and a container uuid, mounts the container and + returns the container client as an object that can be used in other + functions. if a specified container doesnt exist, it creates one with the + provided uuid, if create_container is True + """ + try: + container_name = "user-{}".format(container_uuid) + container_client = blob_service_client.get_container_client(container_name) + if container_client.exists(): + return container_client + elif create_container and not container_client.exists(): + container_client = blob_service_client.create_container(container_name) + # create general directory for new user container + response = await create_folder(container_client, "General") + if response: + return container_client + else: + raise MountContainerError(f"could not create general directory: {container_name}") except MountContainerError as error: print(error) - return False + raise -async def get_blob(container_client, blob_name): +async def get_blob(container_client: ContainerClient, blob_name: str): """ gets the contents of a specified blob in the user's container """ @@ -81,15 +94,15 @@ async def get_blob(container_client, blob_name): blob_content = blob.readall() return blob_content - except GetBlobError as error: - print(error) - return False + except ResourceNotFoundError as error: + raise GetBlobError( + f"the specified blob: {blob_name} cannot be found") from error -async def upload_image(container_client, folder_name, image, hash_value): +async def upload_image(container_client: ContainerClient, folder_name, image, hash_value): """ - uploads the image to the specified folder within the user's container, - if the specified folder doesnt exist, it creates it with a uuid + uploads the image to the specified folder within the user's container, if + the specified folder doesnt exist, it creates it with a uuid """ try: directories = await get_directories(container_client) @@ -148,8 +161,8 @@ async def create_folder(container_client, folder_name): async def upload_inference_result(container_client, folder_name, result, hash_value): """ - uploads the inference results json file to the specified folder - in the users container + uploads the inference results json file to the specified folder in the users + container """ try: folder_uuid = await get_folder_uuid(container_client, folder_name) @@ -166,8 +179,8 @@ async def upload_inference_result(container_client, folder_name, result, hash_va async def get_folder_uuid(container_client, folder_name): """ gets the uuid of a folder in the user's container given the folder name by - iterating through the folder json files and extracting the name - to match given folder name + iterating through the folder json files and extracting the name to match + given folder name """ try: blob_list = container_client.list_blobs() @@ -234,3 +247,37 @@ async def get_directories(container_client): except FolderListError as error: print(error) return [] + +async def get_pipeline_info( + blob_service_client: BlobServiceClient, + pipeline_container_name: str, + pipeline_version: str + ) -> json: + """ + Retrieves the pipeline information from Azure Blob Storage based on the + provided parameters. + + Args: + blob_service_client (BlobServiceClient): The BlobServiceClient object + pipeline_container_name (str): The name of the container where + the pipeline files are stored. + pipeline_version (str): The version of the pipeline to retrieve. + + Returns: + json: The pipeline information in JSON format. + + Raises: + PipelineNotFoundError: If the specified version of the pipeline is not + found. + """ + try: + container_client = blob_service_client.get_container_client( + pipeline_container_name + ) + + blob = await get_blob(container_client, f"pipelines/{pipeline_version}.json") + pipeline = json.loads(blob) + return pipeline + + except GetBlobError as error: + raise PipelineNotFoundError(f"This version {pipeline_version} was not found") from error diff --git a/custom_exceptions.py b/custom_exceptions.py index 7de693e7..3f9cd805 100644 --- a/custom_exceptions.py +++ b/custom_exceptions.py @@ -64,3 +64,7 @@ class ValidateEnvVariablesError(Exception): class ServerError(Exception): pass + + +class PipelineNotFoundError(Exception): + pass diff --git a/docs/nachet-inference-documentation.md b/docs/nachet-inference-documentation.md index 63bbedab..461090c2 100644 --- a/docs/nachet-inference-documentation.md +++ b/docs/nachet-inference-documentation.md @@ -19,11 +19,13 @@ selected by a parameter. ## Glossary ### Pipelines -Pipelines are defined as a set of models that follow each other, where the output of -one model is used as input for the next models, and so on. A pipeline contains from 1 to n -models. + +Pipelines are defined as a set of models that follow each other, where the +output of one model is used as input for the next models, and so on. A pipeline +contains from 1 to n models. #### Pipelines flowchart 1.0.0 + ```mermaid flowchart LR @@ -41,44 +43,46 @@ end ``` ### Models + A model is an AI model that is a part of a pipeline. A model accepts images as input and returns JSON as output. Generally, this JSON contains the coordinates of objects in the source image, that the model may pass along to feed the next step of the pipeline. - ### Model from Frontend + On the frontend interface, a pipeline will be called a model, because the user will not be aware of the difference. From the user's perspective, they send data to a model and receive the result. *Suggestion: we could call the pipeline a method if we don't want to mix terms.* -# Sequence Diagram for inference request 1.0.0 +## Sequence Diagram for inference request 1.2.1 ```mermaid sequenceDiagram - + title: Sequence Diagram for inference request 1.2.1 actor Client participant Frontend participant Backend participant Blob storage participant Model + Backend-)+Backend: run() Note over Backend,Blob storage: initialisation Backend-)Backend: before_serving() - Backend-)Backend: get_pipelines_models() + Backend-)Backend: get_pipelines() alt - Backend-)Blob storage: HTTP POST req. - Blob storage--)Backend: return pipelines_models.json + Backend-)+Blob storage: HTTP POST req. + Blob storage--)-Backend: return pipelines_models.json else - Backend-)Frontend: error 400 No pipeline found + Backend-)Frontend: error 500 Failed to retrieve data from the repository end Note over Backend,Blob storage: end of initialisation - - Client->>Frontend: applicationStart() + + Client->>+Frontend: applicationStart() Frontend-)Backend: HTTP POST req. - Backend-)Backend: get_pipelines_names() + Backend-)Backend: get_model_endpoints_metadata() Backend--)Frontend: Pipelines names res. Note left of Backend: return pipelines names and metadata @@ -86,45 +90,199 @@ sequenceDiagram Client-->>Frontend: client ask action from specific pipeline Frontend-)Backend: HTTP POST req. Backend-)Backend: inference_request(pipeline_name, folder_name, container_name, imageDims, image) - alt missing argument - Backend--)Frontend: Error 400 missing arguments - else no missing argument - Backend-)Backend: mount_container(connection_string(Environnement Variable, container_name)) - Backend-)Blob storage: HTTP POST req. - Blob storage--)Backend: container_client - - Backend-)Backend: upload_image(container_client, folder_name, image_bytes, hash_value) - Backend-)Blob storage: HTTP POST req. - Blob storage--)Backend: blob_name - - Backend-)Backend: get_blob(container_client, blob_name) - Backend-)Blob storage: HTTP POST req. - Blob storage--)Backend: blob - - loop for every model in pipeline - note over Backend, Blob storage: Header construction - Note over Backend,Blob storage: {"Content-Type": "application/json",
"Authorization": ("Bearer " + endpoint_api_key),} - Backend-)Backend: urllib.request.Request(endpoint_url, body, header) - Backend-)Model: HTTP POST req. - Model--)Backend: Result res. - alt next model is not None - note over Backend, Blob storage: restart the loop process - Backend-)Backend: record_result(model, result) - Backend-)Blob storage: HTTP POST req. - note over Backend, Blob storage: record the result produced by the model - - end - end - - par Backend to Frontend - Backend-)Backend: inference.process_inference_results(data, imageDims) - Backend--)Frontend: Processed result res. - and Backend to Blob storage - Backend-)Backend: upload_inference_result(container_client, folder_name, result_json_string, hash_value) - Backend-)Blob storage: HTTP POST req. + alt missing arguments + Backend-)Frontend: Error 400 missing arguments + end + alt wrong pipeline name + Backend-)Frontend: Error 400 wrong pipeline name + end + alt wrong header + Backend-)Frontend: Error 400 wrong header on file + end + + Backend-)Backend: mount_container(connection_string(Environnement Variable, container_name)) + Backend-)+Blob storage: HTTP POST req. + Blob storage--)-Backend: container_client + + Backend-)Backend: Generate Hash(image_bytes) + + Backend-)Backend: upload_image(container_client, folder_name, image_bytes, hash_value) + Backend-)+Blob storage: HTTP POST req. + Blob storage--)-Backend: blob_name + + Backend-)Backend: get_blob(container_client, blob_name) + Backend-)+Blob storage: HTTP POST req. + Blob storage--)-Backend: blob + + loop for every model in pipeline + Backend-)Backend: model.entry_function(model, previous_result) + note over Backend, Blob storage: Every model has is own entry_function + Backend-)Backend: request_factory(previous_result, model) + Backend-)Backend: urllib.request.Request(endpoint_url, body, header) + Backend-)+Model: HTTP POST req. + Model--)-Backend: Result res. + alt if model has process_inference_function + Backend-) Backend: model.inference_function(previous_result, result_json) end end - Frontend--)Client: display result + note over Backend, Blob storage: End of the loop + par Backend to Frontend + Backend-)Backend: inference.process_inference_results(result_json, imageDims) + Backend--)Frontend: Processed result res. + Frontend--)-Client: display result + and Backend to Blob storage + note over Backend, Blob storage: record the result produced by the model + Backend-)Backend: upload_inference_result(container_client, folder_name, result_json_string, hash_value) + Backend-)-Blob storage: HTTP POST req. + end ``` ![footer_for_diagram](https://github.com/ai-cfia/nachet-backend/assets/96267006/cf378d6f-5b20-4e1d-8665-2ba65ed54f8e) + +### Inference Request function + +The inference request function plays a crucial role in Nachet Interactive's +backend. It requests actions from selected models or pipelines based on certain +checks. These checks include verifying that all arguments required to find or +initialize the blob container and process the image have been transmitted to the +function. It also checks if the selected pipeline is recognized by the system +and if the image sent for analysis has a valid header. + +If all the above checks pass, the function initializes or finds the user blob +container and uploads the image. Next, it requests an inference from every model +in the pipeline. Each model specifies their `entry_function` (how to call and +retrieve data) and whether they have a `process_inference` function. Based on +these indications, the results are returned and stored in the cache. + +If no other model is called, the last result is then processed and sent to the frontend. + +### Input and Output for inference request + +The inference request will process the following parameters: + +|Key parameters | Expected Value| +|--|--| +|model_name | The name of the pipeline| +|folder_name | The folder where the image is uploaded in the user's container| +|container_name | The user's container| +|imageDims | The dimension of the image| +|image | The image encoded in b64 (ASCII)| + +Note that since the information is received from the frontend, the model_name is +an abstraction for a pipeline. + +The inference request will return a list with the following information: +|key parameters | hierarchy Levels | Return Value | +|--|--|--| +|Filename| 0 | Contains the filename of the image| +|Boxes | 0 | Contains all the boxes returned by the inference request| +|labelOccurence | 0 | Contains the number of label occurence| +|totalBoxes | 0 | Boxes total number| +|Box | 1 | Contains all the information of one seed in the image| +|label | 1 | Contains the top label for the seed| +|score | 1 | Contains the top score for the seed| +|topN | 1 | Contains the top N scores for the seed| +|overlapping | 1 | Contains a boolean to tell if the box overlap with another one| +|overlappingIndices | 1 | Contains the index of the overlapping box| +|topX | 2 | The top x value of the box around a seed| +|topY | 2 | The top y value of the box around a seed| +|bottomX | 2 | The bottom x value of the box around a seed| +|bottomY| 2 | The bottom y value of the box around a seed| + +*for more look at [nachet-model-documentation](https://github.com/ai-cfia/nachet-backend/blob/51-implementing-2-models/docs/nachet-model-documentation.md#return-value-of-models)* + +**topN** contains the top 5 predictions of the models: + +```json +"topN": [ + { + "label": "seed_name", + "score": 0.75 + } + { + "label": "seed_name", + "score": 0.18 + } + { + "label": "seed_name", + "score": 0.05 + } + { + "label": "seed_name", + "score": 0.019 + } + { + "label": "seed_name", + "score": 0.001 + } +] +``` + +### Blob storage and Pipeline versioning + +To keep track of the various pipeline iterations and versions, JSON files are +stored in the blob storage. Users can add the JSON to the blob storage +using the `pipelines_version_insertion.py` script. This allows for easy +management of model and pipeline history. + +To use the script, 3 environment variables are necessary: + +* NACHET_BLOB_PIPELINE_NAME + * Containing the blob name where the pipelines are stored +* NACHET_BLOB_PIPELINE_VERSION + * Containing the version the user wants to select +* NACHET_BLOB_PIPELINE_DECRYPTION_KEY + * The key to decrypt sensible data such as the API key and the endpoint of a model. + +#### In the code + +In the backend, the pipelines are retrieved using the `get_pipelines` function. +This function retrieves the data from the blob storage and stores the pipeline in +the `CACHE["endpoint"]` variable. This is the variable that feeds the `models` +information and metadata to the frontend. + +```python +async def get_pipelines(): + """ + Retrieves the pipelines from the Azure storage API. + + Returns: + - list: A list of dictionaries representing the pipelines. + """ + try: + app.config["BLOB_CLIENT"] = await azure_storage_api.get_blob_client(connection_string) + result_json = await azure_storage_api.get_pipeline_info(app.config["BLOB_CLIENT"], PIPELINE_BLOB_NAME, PIPELINE_VERSION) + cipher_suite = Fernet(FERNET_KEY) + except (ConnectionStringError, PipelineNotFoundError) as error: + print(error) + raise ServerError("server errror: could not retrieve the pipelines") from error + + models = () + for model in result_json.get("models"): + m = Model( + request_function.get(model.get("api_call_function")), + model.get("model_name"), + # To protect sensible data (API key and model endpoint), we encrypt it when + # it's pushed into the blob storage. Once we retrieve the data here in the + # backend, we need to decrypt the byte format to recover the original + # data. + cipher_suite.decrypt(model.get("endpoint").encode()).decode(), + cipher_suite.decrypt(model.get("api_key").encode()).decode(), + model.get("content_type"), + model.get("deployment_platform") + ) + models += (m,) + # Build the pipeline to call the models in order in the inference request + for pipeline in result_json.get("pipelines"): + CACHE["pipelines"][pipeline.get("pipeline_name")] = tuple([m for m in models if m.name in pipeline.get("models")]) + + return result_json.get("pipelines") +``` + +### Available Version of the JSON file + +|Version|Creation Date| Pipelines| +|--|--|--| +|0.1.3 | 2024-03-26 | Swin Transformer and 6 Seeds Detector| +|0.1.0 | 2024-02-26 | Swin Transformer and 6 Seeds Detector| +|0.1.1 | 2024-03-14 | Swin Transformer and 6 Seeds Detector| diff --git a/docs/nachet-model-documentation.md b/docs/nachet-model-documentation.md new file mode 100644 index 00000000..55063d17 --- /dev/null +++ b/docs/nachet-model-documentation.md @@ -0,0 +1,201 @@ +# Nachet Interactive Models + +## Executive Summary + +Nachet Interactive uses various models to detect seeds. Documentation is +essential to keep track of their features. The models can perform different +tasks, including Image Classification, Image Segmentation, and Object Detection. + +## Task + +Nachet Interactive's models perfom the following tasks: + +|Task|Action|Input/Output| +|:--|:--|:-----| +|[Classification](https://huggingface.co/tasks/image-classification) | This task involves assigning a single label or class to each image the model receives. | The input for the classification models is an image, and the output is a prediction of the class it belongs to. | +|[Object Detection](https://huggingface.co/tasks/object-detection) | Identify and locate an object belonging to a specific class within an image. |The object detection models take an image as an input and output the image with a label and a box around the detected object. | +|[Segmentation](https://huggingface.co/tasks/image-segmentation) | Segmentation is the task of dividing images into different parts, where each pixel in the image is mapped to an object. It includes instance segmentation, panoptic segmentation, and semantic segmentation.| The segmentation models take an image as input and return an image divided into objects. | + +> As of today (2024-02-22), no model uses segmentation. To know more about each +> task, click on the task to follow the link to their hugging face page. To know +> more about AI tasks in general: [Hugging Face +> Tasks](https://huggingface.co/tasks) + +## List of models + +|Model|Full name|Task|API Call Function|Inference Function|Active|Accuracy| +|--|--|:--:|:--:|:--:|:--:|:--:| +|Nachet-6seeds | m-14of15seeds-6seedsmag | Object Detection | nachet_6seeds | None | Yes | - | +|Seed-detector | seed-detector-1 | Object Detection | seed_detector | process_image_slicing | Yes | - | +|Swin | swinv1-base-dataaugv2-1 | Classification | swin | process_swin_result | Yes | - | + +### Request Inference Function + +The request inference functions request a prediction from the specified model +(such as Swin, Nachet-6seeds, etc.). If needed, the function will process the +data to be readable by the next model in the pipeline. For instance, the +Seed-detector only returns "seed" as a label, and its inference needs to be +processed and passed to the next model which assigns the correct label to the +seeds. + +## Return value of models + +```json +{ + "filename": "tmp/tmp_file_name", + "boxes": [ + {"box": { + "topX": 0.0, + "topY": 0.0, + "bottomX": 0.0, + "bottomY": 0.0 + }, + "label": "top_label_name", + "score": 0.912, + "color": "#ff0", + "topN": [ + { + "score": 0.912, + "label": "top_label_name", + }, + { + "score": 0.053, + "label": "seed_name", + }, + { + "score": 0.0029, + "label": "seed_name", + }, + { + "score": 0.005, + "label": "seed_name", + }, + { + "score": 0.001, + "label": "seed_name", + } + ], + "overlapping": false, + "overlappingIndices": 0 + }, + ], + "labelOccurrence": { + "seed_name": 1, + }, + "totalBoxes": 1 +} +``` + +### Why topN + +We decided to named the top results property top N because this value can return +n predictions. Usually in AI, the top 5 result are use to measure the accuracy +of a model. If the correct result is the top 5, then it is considered that the +prediction was true. + +This is useful in case were the user have is attention on more then 1 result. + + > "Top N accuracy — Top N accuracy is when you measure how often your predicted + > class falls in the top N values of your softmax distribution." + [Nagda, R. (2019-11-08) *Evaluating models using the Top N accuracy metrics*. Medium](https://medium.com/nanonets/evaluating-models-using-the-top-n-accuracy-metrics-c0355b36f91b) + +### Box around seed + +The `box` key stores the value for a specific box around a seed. This helps the +frontend application build a red rectangle around every seed on the image. + +![image](https://github.com/ai-cfia/nachet-backend/assets/96267006/469add8d-f40a-483f-b090-0ebcb7a8396b) + +## Different ways of calling models + +### Header + +The endpoint can host multiple models, so specifying the model name in the +header is necessary to avoid errors. + +```python +# Header for every model should be: +headers = { + 'Content-Type': 'application/json', + 'Authorization': ('Bearer ' + endpoint_api_key), + 'azureml-model-deployment': model_name +} +``` + +### Body + +The body structure difference is based on the model tasks. A classification +model can only classify one seed in an image, whereas an object detection model +can detect if the image contains one or multiple seeds. It remains to be +determined whether a segmentation model requires a different body structure. +[See task](#task) + +```python +# Object Detection model +# Example: Nachet-6seeds and seed-detector +body = { + 'input_data': { + 'columns': ['image'], + 'index': [0], + 'data': [image_bytes], + } +} + +# Classification model +# Example: Swin +body = b64encode(image) +``` + +## Error from models + +A list of common error models returns to the backend. + +> To access the error from the model, go to the model endpoint in azure and look +> for the logs : CFIA/ACIA/workspace/endpoint/model/logs + +|Error|Model|Reason|Message| +|--|--|--|--| +|ValueError| Swin |Incorrect image source|Must be a valid url starting with `http://` or `https://`, a valid path to an image file, or a base64 encoded string| + +## Pipeline and model data + +In order to dynamically build the pipeline in the backend from the model, the +following data structure was designed. For now, the pipelines will have two keys +for their names (`model_name`, `piepline_name`) to support the frontend code +until it is changed to get the name of the pipeline with the correct key. + +```yaml +version: +date: +pipelines: + - models: + model_name: + pipeline_name: + created_by: + creation_date: + version: + description: + job_name: + dataset: + metrics: + identifiable: + +models: + - task: + api_call_function: + endpoint: + api_key: + inference_function: + content-type: + deployment_platform: + endpoint_name: + model_name: + created_by: + creation_date: + version: + description: + job_name: + dataset: + metrics: + identifiable: +``` diff --git a/model/__init__.py b/model/__init__.py index e69de29b..94b8db46 100644 --- a/model/__init__.py +++ b/model/__init__.py @@ -0,0 +1,11 @@ +from model.swin import request_inference_from_swin +from model.seed_detector import request_inference_from_seed_detector +from model.test import request_inference_from_test +from model.six_seeds import request_inference_from_nachet_6seeds + +request_function = { + "swin": request_inference_from_swin, + "seed_detector": request_inference_from_seed_detector, + "test": request_inference_from_test, + "nachet_6seeds": request_inference_from_nachet_6seeds +} diff --git a/model/seed_detector.py b/model/seed_detector.py new file mode 100644 index 00000000..2c4f124f --- /dev/null +++ b/model/seed_detector.py @@ -0,0 +1,103 @@ +""" +This file contains the function that requests the inference and processes the data from +the seed detector model. +""" + +import io +import base64 +import json + +from PIL import Image +from collections import namedtuple +from urllib.request import Request, urlopen, HTTPError +from custom_exceptions import InferenceRequestError + +def process_image_slicing(image_bytes: bytes, result_json: dict) -> list: + """ + This function takes the image bytes and the result_json from the model and + returns a list of cropped images. + The result_json is expected to be in the following format: + { + "boxes": [ + { + "box": { + "topX": 0.0, + "topY": 0.0, + "bottomX": 0.0, + "bottomY": 0.0 + }, + "label": "string", + "score": 0.0 + } + ], + } + """ + boxes = result_json[0]['boxes'] + image_io_byte = io.BytesIO(base64.b64decode(image_bytes)) + image_io_byte.seek(0) + image = Image.open(image_io_byte) + + format = image.format + + cropped_images = [bytes(0) for _ in boxes] + + for i, box in enumerate(boxes): + topX = int(box['box']['topX'] * image.width) + topY = int(box['box']['topY'] * image.height) + bottomX = int(box['box']['bottomX'] * image.width) + bottomY = int(box['box']['bottomY'] * image.height) + + img = image.crop((topX, topY, bottomX, bottomY)) + + buffered = io.BytesIO() + img.save(buffered, format) + + cropped_images[i] = base64.b64encode(buffered.getvalue()) + + return cropped_images + + +async def request_inference_from_seed_detector(model: namedtuple, previous_result: str): + """ + Requests inference from the seed detector model using the previously provided result. + + Args: + model (namedtuple): The seed detector model. + previous_result (str): The previous result used for inference. + + Returns: + dict: A dictionary containing the result JSON and the images generated from the inference. + + Raises: + InferenceRequestError: If an error occurs while processing the request. + """ + try: + + headers = { + "Content-Type": model.content_type, + "Authorization": ("Bearer " + model.api_key), + model.deployment_platform: model.name + } + + data = { + "input_data": { + "columns": ["image"], + "index": [0], + "data": [previous_result], + } + } + + body = str.encode(json.dumps(data)) + req = Request(model.endpoint, body, headers) + response = urlopen(req) + + result = response.read() + result_object = json.loads(result.decode("utf8")) + print(json.dumps(result_object[0].get("boxes"), indent=4)) #TODO Transform into logging + + return { + "result_json": result_object, + "images": process_image_slicing(previous_result, result_object) + } + except HTTPError as e: + raise InferenceRequestError(f"An error occurred while processing the request:\n {str(e)}") from None diff --git a/model/six_seeds.py b/model/six_seeds.py new file mode 100644 index 00000000..2334a3d4 --- /dev/null +++ b/model/six_seeds.py @@ -0,0 +1,51 @@ +""" +This file contains the function that requests the inference and processes the data from +the nachet-6seeds model. +""" + +import json +from collections import namedtuple +from urllib.request import Request, urlopen, HTTPError +from custom_exceptions import InferenceRequestError + +async def request_inference_from_nachet_6seeds(model: namedtuple, previous_result: str): + """ + Requests inference from the Nachet Six Seed model. + + Args: + model (namedtuple): The model to use for inference. + previous_result (str): The previous result to pass to the model. + + Returns: + dict: The result of the inference as a JSON object. + + Raises: + InferenceRequestError: If an error occurs while processing the request. + """ + try: + headers = { + "Content-Type": model.content_type, + "Authorization": ("Bearer " + model.api_key), + model.deployment_platform: model.name + } + + data = { + "input_data": { + "columns": ["image"], + "index": [0], + "data": [previous_result], + } + } + body = str.encode(json.dumps(data)) + + req = Request(model.endpoint, body, headers) + response = urlopen(req) + result = response.read() + result_object = json.loads(result.decode("utf8")) + + print(json.dumps(result_object[0].get("boxes"), indent=4)) #TODO Transform into logging + + return result_object + + except HTTPError as e: + raise InferenceRequestError(f"An error occurred while processing the request:\n {str(e)}") from None diff --git a/model/swin.py b/model/swin.py new file mode 100644 index 00000000..2914b65e --- /dev/null +++ b/model/swin.py @@ -0,0 +1,62 @@ +""" +This file contains the function that requests the inference and processes the data from +the swin model. +""" + +import json + +from collections import namedtuple +from urllib.request import Request, urlopen, HTTPError +from custom_exceptions import InferenceRequestError + + +def process_swin_result(img_box:dict, results: dict) -> list: + """ + Args: + img_box (dict): The image box containing the bounding boxes and labels. + results (dict): The results from the model containing the detected seeds. + + Returns: + list: The updated image box with modified labels and scores. + """ + for i, result in enumerate(results): + img_box[0]['boxes'][i]['label'] = result[0].get("label") + img_box[0]['boxes'][i]['score'] = result[0].get("score") + img_box[0]['boxes'][i]["topN"] = [d for d in result] + + return img_box + + +async def request_inference_from_swin(model: namedtuple, previous_result: list[bytes]): + """ + Perform inference using the SWIN model on a list of images. + + Args: + model (namedtuple): The SWIN model to use for inference. + previous_result (list[bytes]): The previous result containing the images to perform inference on. + + Returns: + The result of the inference. + + Raises: + InferenceRequestError: If an error occurs while processing the request. + """ + try: + results = [] + for img in previous_result.get("images"): + headers = { + "Content-Type": model.content_type, + "Authorization": ("Bearer " + model.api_key), + model.deployment_platform: model.name + } + body = img + req = Request(model.endpoint, body, headers) + response = urlopen(req) + result = response.read() + results.append(json.loads(result.decode("utf8"))) + + print(json.dumps(results, indent=4)) #TODO Transform into logging + + return process_swin_result(previous_result.get("result_json"), results) + except HTTPError as e: + raise InferenceRequestError(f"An error occurred while processing the request:\n {str(e)}") from None diff --git a/model/test.py b/model/test.py new file mode 100644 index 00000000..3ca8b6f1 --- /dev/null +++ b/model/test.py @@ -0,0 +1,56 @@ +""" +This module contains functions for performing inference using different models. + +Functions: + request_inference_from_swin: Perform inference using the SWIN model on a list of images. + request_inference_from_seed_detector: Requests inference from the seed detector model using the provided previous result. + request_inference_from_nachet_six_seed: Requests inference from the Nachet Six Seed model. +""" +from collections import namedtuple +from custom_exceptions import InferenceRequestError + + +async def request_inference_from_test(model: namedtuple, previous_result: str): + """ + Requests a test case inference. + + Args: + model (namedtuple): The model to use for the test inference. + previous_result (str): The previous result to pass to the model. + + Returns: + dict: The result of the inference as a JSON object. + + Raises: + InferenceRequestError: If an error occurs while processing the request. + """ + try: + if previous_result == '': + raise ValueError("The result send to the inference function is empty") + print(f"processing test request for {model.name} with {type(previous_result)} arguments") + return [ + { + "filename": "test_image.jpg", + "boxes": [ + { + "box": { + "topX": 0.078, + "topY": 0.068, + "bottomX": 0.86, + "bottomY": 0.56 + }, + "label": "test_label", + "score": 1.0, + "topN": [ + { + "label": "test_label", + "score": 1.0, + }, + ], + } + ] + } + ] + + except ValueError as error: + raise InferenceRequestError("An error occurred while processing the request") from error diff --git a/requirements.txt b/requirements.txt index 5ef34973..7444593d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,5 @@ quart quart-cors python-dotenv hypercorn +Pillow +cryptography diff --git a/run_tests.py b/run_tests.py deleted file mode 100644 index cdecaf77..00000000 --- a/run_tests.py +++ /dev/null @@ -1,10 +0,0 @@ -from tests.test_azure_storage_api import TestMountContainerFunction, TestGetBlob - -a = TestMountContainerFunction() -a.test_mount_existing_container() -a.test_mount_nonexisting_container_create() -a.test_mount_nonexisting_container_no_create() - -b = TestGetBlob() -b.test_get_blob_successful() -b.test_get_blob_unsuccessful() diff --git a/tests/1310_1.png b/tests/1310_1.png new file mode 100644 index 00000000..7f8a8396 Binary files /dev/null and b/tests/1310_1.png differ diff --git a/tests/test_azure_storage_api.py b/tests/test_azure_storage_api.py index 5140dd05..2ee52156 100644 --- a/tests/test_azure_storage_api.py +++ b/tests/test_azure_storage_api.py @@ -1,15 +1,43 @@ +import json import unittest +import asyncio from unittest.mock import patch, Mock, MagicMock from azure_storage.azure_storage_api import ( mount_container, get_blob, + get_pipeline_info, + get_blob_client ) + +from azure.core.exceptions import ResourceNotFoundError + from custom_exceptions import ( GetBlobError, + PipelineNotFoundError, + ConnectionStringError ) -import asyncio +class TestGetBlobServiceClient(unittest.TestCase): + @patch("azure.storage.blob.BlobServiceClient.from_connection_string") + def test_get_blob_service_successful(self, MockFromConnectionString): + mock_blob_service_client = MockFromConnectionString.return_value + + result = asyncio.run( + get_blob_client("connection_string") + ) + + print(result == mock_blob_service_client) + + self.assertEqual(result, mock_blob_service_client) + + @patch("azure.storage.blob.BlobServiceClient.from_connection_string") + def test_get_blob_service_unsuccessful(self, MockFromConnectionString): + MockFromConnectionString.return_value = None + with self.assertRaises(ConnectionStringError) as context: + asyncio.run(get_blob_client("invalid_connection_string")) + + print(context.exception == "the given connection string is invalid: invalid_connection_string") class TestMountContainerFunction(unittest.TestCase): @patch("azure.storage.blob.BlobServiceClient.from_connection_string") @@ -22,14 +50,10 @@ def test_mount_existing_container(self, MockFromConnectionString): mock_blob_service_client.get_container_client.return_value = ( mock_container_client ) - - connection_string = "test_connection_string" container_name = "testcontainer" - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = loop.run_until_complete( - mount_container(connection_string, container_name) + result = asyncio.run( + mount_container(mock_blob_service_client, container_name) ) print(result == mock_container_client) @@ -57,14 +81,11 @@ def test_mount_nonexisting_container_create(self, MockFromConnectionString): mock_new_container_client ) - connection_string = "test_connection_string" container_name = "testcontainer" expected_container_name = "user-{}".format(container_name) - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = loop.run_until_complete( - mount_container(connection_string, container_name, create_container=True) + result = asyncio.run( + mount_container(mock_blob_service_client, container_name, create_container=True) ) mock_blob_service_client.create_container.assert_called_once_with( @@ -83,13 +104,10 @@ def test_mount_nonexisting_container_no_create(self, MockFromConnectionString): mock_container_client ) - connection_string = "test_connection_string" container_name = "testcontainer" - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = loop.run_until_complete( - mount_container(connection_string, container_name, create_container=False) + result = asyncio.run( + mount_container(mock_blob_service_client, container_name, create_container=False) ) mock_blob_service_client.create_container.assert_not_called() @@ -98,8 +116,7 @@ def test_mount_nonexisting_container_no_create(self, MockFromConnectionString): class TestGetBlob(unittest.TestCase): - @patch("azure.storage.blob.BlobServiceClient.from_connection_string") - def test_get_blob_successful(self, MockFromConnectionString): + def test_get_blob_successful(self): mock_blob_name = "test_blob" mock_blob_content = b"blob content" @@ -112,9 +129,7 @@ def test_get_blob_successful(self, MockFromConnectionString): mock_container_client = Mock() mock_container_client.get_blob_client.return_value = mock_blob_client - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = loop.run_until_complete( + result = asyncio.run( get_blob(mock_container_client, mock_blob_name) ) @@ -122,28 +137,71 @@ def test_get_blob_successful(self, MockFromConnectionString): self.assertEqual(result, mock_blob_content) + def test_get_blob_unsuccessful(self): + blob = "nonexisting_blob" + + mock_blob_client = Mock() + mock_blob_client.download_blob.side_effect = ResourceNotFoundError("Resource not found") + + mock_container_client = Mock() + mock_container_client.get_blob_client.return_value = mock_blob_client + + with self.assertRaises(GetBlobError) as context: + asyncio.run(get_blob(mock_container_client, blob)) + print(str(context.exception) == f"the specified blob: {blob} cannot be found") + + +class testGetPipeline(unittest.TestCase): @patch("azure.storage.blob.BlobServiceClient.from_connection_string") - def test_get_blob_unsuccessful(self, MockFromConnectionString): - mock_blob_content = b"blob content" + def test_get_pipeline_info_successful(self, MockFromConnectionString,): + + mock_blob_content = b'''{ + "name": "test_blob.json", + "version": "v1" + }''' mock_blob = Mock() mock_blob.readall.return_value = mock_blob_content mock_blob_client = Mock() - mock_blob_client.download_blob.side_effect = GetBlobError("Blob not found") + mock_blob_client.configure_mock(name="test_blob.json") + mock_blob_client.download_blob.return_value = mock_blob + + mock_container_client = MagicMock() + mock_container_client.list_blobs.return_value = [mock_blob_client] + mock_container_client.get_blob_client.return_value = mock_blob_client + + mock_blob_service_client = MockFromConnectionString.return_value + mock_blob_service_client.get_container_client.return_value = ( + mock_container_client + ) + + result = asyncio.run(get_pipeline_info(mock_blob_service_client, "test_blob", "v1")) + + print(result == json.loads(mock_blob_content)) + + self.assertEqual(result, json.loads(mock_blob_content)) + + + @patch("azure.storage.blob.BlobServiceClient.from_connection_string") + def test_get_pipeline_info_unsuccessful(self, MockFromConnectionString): + pipeline_version = "v1" + + mock_blob_client = Mock() + mock_blob_client.download_blob.side_effect = ResourceNotFoundError("Resource not found") mock_container_client = Mock() mock_container_client.get_blob_client.return_value = mock_blob_client - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = loop.run_until_complete( - get_blob(mock_container_client, "nonexisting_blob") + mock_blob_service_client = MockFromConnectionString.return_value + mock_blob_service_client.get_container_client.return_value = ( + mock_container_client ) - print(result is False) + with self.assertRaises(PipelineNotFoundError) as context: + asyncio.run(get_pipeline_info(mock_blob_service_client, "test_blob", pipeline_version)) - self.assertEqual(result, False) + print(str(context.exception) == f"This version {pipeline_version} was not found") if __name__ == "__main__": diff --git a/tests/test_health_request.py b/tests/test_health_request.py new file mode 100644 index 00000000..7f0d95e0 --- /dev/null +++ b/tests/test_health_request.py @@ -0,0 +1,16 @@ +import unittest +import asyncio + +from app import app + +class TestQuartHealth(unittest.TestCase): + def test_health(self): + test = app.test_client() + + response = asyncio.run( + test.get('/health') + ) + self.assertEqual(response.status_code, 200) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_inference_request.py b/tests/test_inference_request.py new file mode 100644 index 00000000..8ca35760 --- /dev/null +++ b/tests/test_inference_request.py @@ -0,0 +1,236 @@ +import unittest +import json +import os +import base64 +import asyncio + +from app import app +from unittest.mock import patch, MagicMock, Mock + +class TestInferenceRequest(unittest.TestCase): + def setUp(self) -> None: + """ + Set up the test environment before running each test case. + """ + # Start the test pipeline + self.test = app.test_client() + response = asyncio.run( + self.test.get("/test") + ) + self.pipeline = json.loads(asyncio.run(response.get_data()))[0] + current_dir = os.path.dirname(__file__) + image_path = os.path.join(current_dir, '1310_1.png') + self.endpoints = "/model-endpoints-metadata" + self.inference = "/inf" + self.container_name = "bab1da84-5937-4016-965e-67e1ea6e29c4" + self.folder_name = "test_folder" + self.image_header = "data:image/PNG;base64," + with open(image_path, 'rb') as image_file: + self.image_src = base64.b64encode(image_file.read()).decode('utf-8') + + def tearDown(self) -> None: + """ + Tear down the test environment at the end of each test case. + """ + self.image_src = None + self.test = None + + @patch("azure_storage.azure_storage_api.mount_container") + def test_inference_request_successful(self, mock_container): + # Mock azure client services + mock_blob = Mock() + mock_blob.readall.return_value = bytes(self.image_src, encoding="utf-8") + + mock_blob_client = Mock() + mock_blob_client.configure_mock(name="test_blob.json") + mock_blob_client.download_blob.return_value = mock_blob + + mock_container_client = MagicMock() + mock_container_client.list_blobs.return_value = [mock_blob_client] + mock_container_client.get_blob_client.return_value = mock_blob_client + mock_container_client.exists.return_value = True + + mock_container.return_value = mock_container_client + # Build expected response keys + responses = set() + expected_keys = { + "filename", + "boxes", + "labelOccurrence", + "totalBoxes", + "box", + "label", + "color", + "score", + "topN", + "overlapping", + "overlappingIndices" + } + + # Test the answers from inference_request + response = asyncio.run( + self.test.post( + '/inf', + headers={ + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + json={ + "image": self.image_header + self.image_src, + "imageDims": [720,540], + "folder_name": self.folder_name, + "container_name": self.container_name, + "model_name": self.pipeline.get("pipeline_name") + }) + ) + + result_json = json.loads(asyncio.run(response.get_data()))[0] + keys = set(result_json.keys()) + keys.update(result_json["boxes"][0].keys()) + responses.update(keys) + + print(expected_keys == responses) + self.assertEqual(responses, expected_keys) + + @patch("azure_storage.azure_storage_api.mount_container") + def test_inference_request_unsuccessfull(self, mock_container): + # Mock azure client services + mock_blob = Mock() + mock_blob.readall.return_value = b"" + + mock_blob_client = Mock() + mock_blob_client.configure_mock(name="test_blob.json") + mock_blob_client.download_blob.return_value = mock_blob + + mock_container_client = MagicMock() + mock_container_client.list_blobs.return_value = [mock_blob_client] + mock_container_client.get_blob_client.return_value = mock_blob_client + mock_container_client.exists.return_value = True + + mock_container.return_value = mock_container_client + + # Build expected response + expected = 400 + + # Test the answers from inference_request + response = asyncio.run( + self.test.post( + '/inf', + headers={ + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + json={ + "image": self.image_header, + "imageDims": [720,540], + "folder_name": self.folder_name, + "container_name": self.container_name, + "model_name": self.pipeline.get("pipeline_name") + }) + ) + + print(expected == response.status_code) + self.assertEqual(response.status_code, expected) + + def test_inference_request_missing_argument(self): + # Build expected response + responses = [] + expected = ("InferenceRequestError: missing request arguments: either folder_name, container_name, imageDims or image is missing") + + data = { + "image": self.image_header, + "imageDims": [720,540], + "folder_name": self.folder_name, + "container_name": self.container_name, + "model_name": self.pipeline.get("pipeline_name") + } + + # Test the answers from inference_request + + for k, v in data.items(): + if k != "model_name": + data[k] = "" + response = asyncio.run( + self.test.post( + '/inf', + headers={ + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + json=data + ) + ) + result_json = json.loads(asyncio.run(response.get_data())) + if len(responses) == 0: + responses.append(result_json[0]) + if responses[0] != result_json[0]: + responses.append(result_json[0]) + data[k] = v + + if len(responses) > 1: + raise ValueError(f"Different errors messages were given; expected only 'missing request arguments', {responses}") + print(expected == result_json[0]) + print(response.status_code == 400) + self.assertEqual(result_json[0], expected) + self.assertEqual(response.status_code, 400) + + def test_inference_request_wrong_pipeline_name(self): + # Build expected response + expected = ("InferenceRequestError: model wrong_pipeline_name not found") + + # Test the answers from inference_request + response = asyncio.run( + self.test.post( + '/inf', + headers={ + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + json={ + "image": self.image_src, + "imageDims": [720,540], + "folder_name": self.folder_name, + "container_name": self.container_name, + "model_name": "wrong_pipeline_name" + } + ) + ) + result_json = json.loads(asyncio.run(response.get_data())) + + print(expected == result_json[0]) + print(response.status_code == 400) + + self.assertEqual(result_json[0], expected) + self.assertEqual(response.status_code, 400) + + def test_inference_request_wrong_header(self): + # Build expected response + expected = ("InferenceRequestError: invalid image header") + + # Test the answers from inference_request + response = asyncio.run( + self.test.post( + '/inf', + headers={ + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + json={ + "image": "data:python," + self.image_src, + "imageDims": [720,540], + "folder_name": self.folder_name, + "container_name": self.container_name, + "model_name": self.pipeline.get("pipeline_name") + } + ) + ) + result_json = json.loads(asyncio.run(response.get_data())) + + print(expected == result_json[0]) + print(response.status_code == 400) + + self.assertEqual(result_json[0], expected) + self.assertEqual(response.status_code, 400) + +if __name__ == '__main__': + unittest.main()