From 63dddec39ec6de6190e7883d142b08c7442d3615 Mon Sep 17 00:00:00 2001 From: Maxence Guindon Date: Mon, 26 Feb 2024 16:48:19 +0000 Subject: [PATCH] fixes #51: add function to retrieve pipeline info from blob storage --- app.py | 18 +++--- azure_storage/azure_storage_api.py | 35 ++++++++++++ custom_exceptions.py | 3 + docs/nachet-model-documentation.md | 92 +++++++++++++++--------------- 4 files changed, 96 insertions(+), 52 deletions(-) diff --git a/app.py b/app.py index 96609d81..2f36a4aa 100644 --- a/app.py +++ b/app.py @@ -325,7 +325,10 @@ async def fetch_json(repo_URL, key, file_path, mock=False): if mock: with open("mock_pipeline_json.json", "r+") as f: result_json = json.load(f) - + else: + # TO DO: call the blob storage to get the file + result_json = await azure_storage_api.get_pipeline_info(connection_string, "user-bab1da84-5937-4016-965e-67e1ea6e29c4", "0.1.0") + api_call_function = {func.split("from_")[1]: getattr(model_module, func) for func in dir(model_module) if "inference" in func.split("_")} inference_functions = {func: getattr(inference, func) for func in dir(inference) if "process" in func.split("_")} models = () @@ -347,10 +350,9 @@ async def fetch_json(repo_URL, key, file_path, mock=False): return result_json.get("pipelines") except urllib.error.HTTPError as error: - return jsonify({"error": f"Failed to retrieve the JSON. \ - HTTP Status Code: {error.code}"}), 400 + raise ValueError(str(error)) except Exception as e: - return jsonify({"error": str(e)}), 500 + raise ValueError(str(e)) async def data_factory(**kwargs): @@ -364,9 +366,11 @@ async def before_serving(): try: # Get all the inference functions from the model_module and map them in a dictionary CACHE["seeds"] = await fetch_json(NACHET_DATA, "seeds", "seeds/all.json") - CACHE["endpoints"] = await fetch_json(NACHET_MODEL, "endpoints", "model_endpoints_metadata.json", mock=True) - except: - raise ValueError("Failed to load the JSON document.") + CACHE["endpoints"] = await fetch_json(NACHET_MODEL, "endpoints", "model_endpoints_metadata.json") #, mock=True) + print(CACHE["endpoints"]) + except Exception as e: + print(e) + raise ServerError("Failed to retrieve data from the repository") if __name__ == "__main__": app.run(debug=True, host="0.0.0.0", port=8080) diff --git a/azure_storage/azure_storage_api.py b/azure_storage/azure_storage_api.py index 2d27a3d9..6c93a9a4 100644 --- a/azure_storage/azure_storage_api.py +++ b/azure_storage/azure_storage_api.py @@ -13,6 +13,7 @@ FolderListError, GenerateHashError, CreateDirectoryError, + PipelineNotFoundError, ) """ @@ -234,3 +235,37 @@ async def get_directories(container_client): except FolderListError as error: print(error) return [] + +async def get_pipeline_info( + connection_string: str, + pipeline_container_name: str, + pipeline_version: str + ) -> json: + try: + blob_service_client = BlobServiceClient.from_connection_string( + connection_string + ) + + if blob_service_client: + container_client = blob_service_client.get_container_client( + pipeline_container_name + ) + + blob_list = container_client.list_blobs() + for blob in blob_list: + if blob.name.split(".")[-1] != "json": + print("WARNING a non JSON file is in the folder") + else: + json_blob = await get_blob(container_client, blob.name) + if json_blob: + pipeline = json.loads(json_blob) + if pipeline.get("version") == pipeline_version: + return pipeline + else: + raise PipelineNotFoundError( + "This version of the pipeline was not found." + ) + + except FolderListError as error: + print(error) + return False diff --git a/custom_exceptions.py b/custom_exceptions.py index 7de693e7..b026a687 100644 --- a/custom_exceptions.py +++ b/custom_exceptions.py @@ -64,3 +64,6 @@ class ValidateEnvVariablesError(Exception): class ServerError(Exception): pass + +class PipelineNotFoundError(Exception): + pass diff --git a/docs/nachet-model-documentation.md b/docs/nachet-model-documentation.md index 88515873..e872c0cc 100644 --- a/docs/nachet-model-documentation.md +++ b/docs/nachet-model-documentation.md @@ -32,43 +32,47 @@ Nachet Interactive models' perform the following tasks: ## Return value of models ```json -result_json = { - 'filename': 'tmp/tmp_file_name', //depending on the model but should be standard - 'boxes': [ - {'box': { - 'topX': 0.0, - 'topY: 0.0, - 'bottomX': 0.0, - 'bottomY.: 0.0 - }, // The data to draw the box around the seed. - 'label': 'label_name', // Top label - 'score': 0.999 // Top score - 'topResult': [ +{ + "filename": "tmp/tmp_file_name", + "boxes": [ + {"box": { + "topX": 0.0, + "topY": 0.0, + "bottomX": 0.0, + "bottomY": 0.0 + }, + "label": "top_label_name", + "score": 0.912, + "topResult": [ { - 'score': 0.999 - 'label': seed_name, + "score": 0.912 + "label": "top_label_name", }, { - 'score': 0.999 - 'label': seed_name, + "score": 0.053 + "label": "seed_name", }, { - 'score': 0.999 - 'label': seed_name, + "score": 0.0029 + "label": "seed_name", }, { - 'score': 0.999 - 'label': seed_name, + "score": 0.005 + "label": "seed_name", }, { - 'score': 0.999 - 'label': seed_name, + "score": 0.001 + "label": "seed_name", } ], - 'overlapping': false //or true - 'overlappingIndices': 0 // The index of the overlapping box - } + "overlapping": false, + "overlappingIndices": 0 + }, ], + "labelOccurrence": { + "seed_name": 1, + }, + "totalBoxes": 1 } ``` @@ -126,29 +130,29 @@ A list of common error models returns to the backend. ## Pipeline and model data In order to dynamically build the pipeline in the backend from the model, the -following data structure was designed. +following data structure was designed. For now, the pipelines will have two keys for their names (`model_name`, `piepline_name`) to support the frontend code until it is changed to get the name of the pipeline with the correct key. ```json -// Pipelines { + "version": "0.1.0", + "date": "2024-02-26", + "pipelines": [ { - "endpoint_name": ["seed-detector", "swin-endpoint"], - "piepline_name": "Swin transformer", - "created_by": "Amir Ardalan Kalantari Dehaghi", - "creation_date": "2023-12-01", + "models": ["model 1", "model 2"], + "model_name": "Model(Pipeline) 1", + "pipeline_name": "Pipeline 1", + "created_by": "creator name", + "creation_date": "2024-01-01", "version": "1", "description": "", "job_name": "", "dataset": "", "metrics": [], "identifiable": [] - }, - ] -} - -// Models -{ + } + ], + "models": [ { "task": "object-detection", @@ -158,19 +162,17 @@ following data structure was designed. "infeference functions": "function_key", "content-type": "application/json", "deployment_platform": {"azure": "azureml-model-deployment"}, - // To front-end - "endpoint_name": "nachet-6seeds", - "model_name": "14of15Seeds_6SEEDSMag", - "created_by": "Amir Ardalan Kalantari Dehaghi", - "creation_date": "2023-04-27", + "endpoint_name": "endpoint-name", + "model_name": "model name", + "created_by": "creator name", + "creation_date": "2024-01-01", "version": "1", - "description": "trained using 6 seed images per image of 14of15 tagarno", - "job_name": "neat_cartoon_k0y4m0vz", + "description": "", + "job_name": "", "dataset": "", "metrics": [], "identifiable": [] } - //... ] } ```