fixes #51: add function to retrieve pipeline info from blob storage

ai-cfia · Feb 26, 2024 · 63dddec · 63dddec
1 parent 3e0ca86
commit 63dddec
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 52 deletions.
diff --git a/app.py b/app.py
@@ -325,7 +325,10 @@ async def fetch_json(repo_URL, key, file_path, mock=False):
         if mock:
             with open("mock_pipeline_json.json", "r+") as f:
                 result_json = json.load(f)
-
+        else:
+            # TO DO: call the blob storage to get the file
+            result_json = await azure_storage_api.get_pipeline_info(connection_string, "user-bab1da84-5937-4016-965e-67e1ea6e29c4", "0.1.0")
+
         api_call_function = {func.split("from_")[1]: getattr(model_module, func) for func in dir(model_module) if "inference" in func.split("_")}
         inference_functions = {func: getattr(inference, func) for func in dir(inference) if "process" in func.split("_")}
         models = ()
@@ -347,10 +350,9 @@ async def fetch_json(repo_URL, key, file_path, mock=False):
         return result_json.get("pipelines") 
 
     except urllib.error.HTTPError as error:
-        return jsonify({"error": f"Failed to retrieve the JSON. \
-                        HTTP Status Code: {error.code}"}), 400
+        raise ValueError(str(error))
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
+        raise ValueError(str(e))
 
 
 async def data_factory(**kwargs):
@@ -364,9 +366,11 @@ async def before_serving():
     try:
         # Get all the inference functions from the model_module and map them in a dictionary
         CACHE["seeds"] = await fetch_json(NACHET_DATA, "seeds", "seeds/all.json")
-        CACHE["endpoints"] = await fetch_json(NACHET_MODEL, "endpoints", "model_endpoints_metadata.json", mock=True)
-    except:
-        raise ValueError("Failed to load the JSON document.")
+        CACHE["endpoints"] = await fetch_json(NACHET_MODEL, "endpoints", "model_endpoints_metadata.json") #, mock=True)
+        print(CACHE["endpoints"])
+    except Exception as e:
+        print(e)
+        raise ServerError("Failed to retrieve data from the repository")
 
 if __name__ == "__main__":
     app.run(debug=True, host="0.0.0.0", port=8080)

diff --git a/azure_storage/azure_storage_api.py b/azure_storage/azure_storage_api.py
@@ -13,6 +13,7 @@
     FolderListError,
     GenerateHashError,
     CreateDirectoryError,
+    PipelineNotFoundError,
 )
 
 """
@@ -234,3 +235,37 @@ async def get_directories(container_client):
     except FolderListError as error:
         print(error)
         return []
+
+async def get_pipeline_info(
+        connection_string: str,
+        pipeline_container_name: str,
+        pipeline_version: str
+    ) -> json:
+    try:
+        blob_service_client = BlobServiceClient.from_connection_string(
+            connection_string
+        )
+
+        if blob_service_client:
+            container_client = blob_service_client.get_container_client(
+                pipeline_container_name
+            )
+
+            blob_list = container_client.list_blobs()
+            for blob in blob_list:
+                if blob.name.split(".")[-1] != "json":
+                    print("WARNING a non JSON file is in the folder")
+                else:
+                    json_blob = await get_blob(container_client, blob.name)
+                    if json_blob:
+                        pipeline = json.loads(json_blob)
+                        if pipeline.get("version") == pipeline_version:
+                            return pipeline
+            else:
+                raise PipelineNotFoundError(
+                    "This version of the pipeline was not found."
+                )
+
+    except FolderListError as error:
+        print(error)
+        return False
diff --git a/custom_exceptions.py b/custom_exceptions.py
@@ -64,3 +64,6 @@ class ValidateEnvVariablesError(Exception):
 
 class ServerError(Exception):
     pass
+
+class PipelineNotFoundError(Exception):
+    pass
diff --git a/docs/nachet-model-documentation.md b/docs/nachet-model-documentation.md
@@ -32,43 +32,47 @@ Nachet Interactive models' perform the following tasks:
 ## Return value of models
 
 ```json
-result_json = {
-    'filename': 'tmp/tmp_file_name', //depending on the model but should be standard
-    'boxes': [
-        {'box': {
-                'topX': 0.0,
-                'topY: 0.0,
-                'bottomX': 0.0,
-                'bottomY.: 0.0
-            }, // The data to draw the box around the seed.
-        'label': 'label_name', // Top label
-        'score': 0.999 // Top score
-        'topResult': [
+{
+    "filename": "tmp/tmp_file_name",
+    "boxes": [
+        {"box": {
+                "topX": 0.0,
+                "topY": 0.0,
+                "bottomX": 0.0,
+                "bottomY": 0.0
+            },
+        "label": "top_label_name",
+        "score": 0.912,
+        "topResult": [
             {
-                'score': 0.999
-                'label': seed_name,
+                "score": 0.912
+                "label": "top_label_name",
             },
             {
-                'score': 0.999
-                'label': seed_name,
+                "score": 0.053
+                "label": "seed_name",
             },
             {
-                'score': 0.999
-                'label': seed_name,
+                "score": 0.0029
+                "label": "seed_name",
             },
             {
-                'score': 0.999
-                'label': seed_name,
+                "score": 0.005
+                "label": "seed_name",
             },
             {
-                'score': 0.999
-                'label': seed_name,
+                "score": 0.001
+                "label": "seed_name",
             }
         ],
-        'overlapping': false //or true
-        'overlappingIndices': 0 // The index of the overlapping box
-        }
+        "overlapping": false,
+        "overlappingIndices": 0
+        },
     ],
+    "labelOccurrence": {
+        "seed_name": 1,
+    },
+    "totalBoxes": 1
 }
 ```
 
@@ -126,29 +130,29 @@ A list of common error models returns to the backend.
 ## Pipeline and model data
 
 In order to dynamically build the pipeline in the backend from the model, the
-following data structure was designed.
+following data structure was designed. For now, the pipelines will have two keys for their names (`model_name`, `piepline_name`) to support the frontend code until it is changed to get the name of the pipeline with the correct key. 
 
 ```json
-// Pipelines
 {
+    "version": "0.1.0",
+    "date": "2024-02-26",
+    "pipelines":
     [
         {
-            "endpoint_name": ["seed-detector", "swin-endpoint"],
-            "piepline_name": "Swin transformer",
-            "created_by": "Amir Ardalan Kalantari Dehaghi",
-            "creation_date": "2023-12-01",
+            "models": ["model 1", "model 2"],
+            "model_name": "Model(Pipeline) 1",
+            "pipeline_name": "Pipeline 1",
+            "created_by": "creator name",
+            "creation_date": "2024-01-01",
             "version": "1",
             "description": "",
             "job_name": "",
             "dataset": "",
             "metrics": [],
             "identifiable": []
-        },
-    ]
-}
-
-// Models
-{
+        }
+    ],
+    "models":
     [
         {
             "task": "object-detection",
@@ -158,19 +162,17 @@ following data structure was designed.
             "infeference functions": "function_key",
             "content-type": "application/json",
             "deployment_platform": {"azure": "azureml-model-deployment"},
-            // To front-end
-            "endpoint_name": "nachet-6seeds",
-            "model_name": "14of15Seeds_6SEEDSMag",
-            "created_by": "Amir Ardalan Kalantari Dehaghi",
-            "creation_date": "2023-04-27",
+            "endpoint_name": "endpoint-name",
+            "model_name": "model name",
+            "created_by": "creator name",
+            "creation_date": "2024-01-01",
             "version": "1",
-            "description": "trained using 6 seed images per image of 14of15 tagarno",
-            "job_name": "neat_cartoon_k0y4m0vz",
+            "description": "",
+            "job_name": "",
             "dataset": "",
             "metrics": [],
             "identifiable": []
         }
-    //...
     ]
 }
 ```
-Original file line number
+Diff line change
@@ Expand Up / @@ -64,3 +64,6 @@ class ValidateEnvVariablesError(Exception): @@
     class ServerError(Exception):
         pass
+    class PipelineNotFoundError(Exception):
+        pass