diff --git a/.env.template b/.env.template index fdfa139..f03f019 100644 --- a/.env.template +++ b/.env.template @@ -1,8 +1,13 @@ NACHET_AZURE_STORAGE_CONNECTION_STRING= +NACHET_STORAGE_URL= +NACHET_DB_URL= +NACHET_SCHEMA= NACHET_DATA= NACHET_BLOB_PIPELINE_NAME= NACHET_BLOB_PIPELINE_VERSION= NACHET_BLOB_PIPELINE_DECRYPTION_KEY= +NACHET_BLOB_ACCOUNT= +NACHET_BLOB_KEY= NACHET_MAX_CONTENT_LENGTH= NACHET_VALID_EXTENSION= NACHET_VALID_DIMENSION= diff --git a/README.md b/README.md index 8cd3534..56379b6 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ If you want to run the program as a Docker container (e.g., for production), use ```bash docker build -t nachet-backend . -docker run -p 8080:8080 -v $(pwd):/app nachet-backend +docker run -p 8080:8080 -e PORT=8080 -v $(pwd):/app nachet-backend ``` ### TESTING NACHET-BACKEND diff --git a/app.py b/app.py index 5a6646f..9d05c59 100644 --- a/app.py +++ b/app.py @@ -16,10 +16,14 @@ from collections import namedtuple from cryptography.fernet import Fernet -import azure_storage.azure_storage_api as azure_storage_api -from azure.core.exceptions import ResourceNotFoundError, ServiceResponseError -import model.inference as inference -from model import request_function +load_dotenv() # noqa: E402 + +import model.inference as inference # noqa: E402 +import storage.datastore_storage_api as datastore # noqa: E402 +from azure.core.exceptions import ResourceNotFoundError, ServiceResponseError # noqa: E402 +from model import request_function # noqa: E402 +from datastore import azure_storage # noqa: E402 + class APIErrors(Exception): pass @@ -49,6 +53,18 @@ class ImageValidationError(APIErrors): pass +class ValidateEnvVariablesError(APIErrors): + pass + + +class EmailNotSendError(APIErrors): + pass + + +class EmptyPictureSetError(APIErrors): + pass + + class APIWarnings(UserWarning): pass @@ -60,7 +76,6 @@ class ImageWarning(APIWarnings): class MaxContentLengthWarning(APIWarnings): pass -load_dotenv() connection_string_regex = r"^DefaultEndpointsProtocol=https?;.*;FileEndpoint=https://[a-zA-Z0-9]+\.file\.core\.windows\.net/;$" pipeline_version_regex = r"\d.\d.\d" @@ -149,12 +164,10 @@ async def before_serving(): if not bool(re.match(pipeline_version_regex, PIPELINE_VERSION)): raise ServerError("Incorrect environment variable: PIPELINE_VERSION") - CACHE["seeds"] = await fetch_json(NACHET_DATA, "seeds", "seeds/all.json") - CACHE["endpoints"] = await get_pipelines( - CONNECTION_STRING, PIPELINE_BLOB_NAME, - PIPELINE_VERSION, Fernet(FERNET_KEY) - ) - + # Store the seeds names and ml structure in CACHE + CACHE["seeds"] = datastore.get_all_seeds_names() + CACHE["endpoints"] = await get_pipelines() + print( f"""Server start with current configuration:\n date: {date.today()} @@ -168,6 +181,23 @@ async def before_serving(): raise +@app.get("/get-user-id") +async def get_user_id() : + """ + Returns the user id + """ + try: + data = await request.get_json() + email = data["email"] + + user_id = datastore.get_user_id(email) + + return jsonify(user_id), 200 + except (KeyError, TypeError, ValueError, datastore.DatastoreError) as error: + print(error) + return jsonify([f"GetUserIdError: {str(error)}"]), 400 + + @app.post("/del") async def delete_directory(): """ @@ -178,11 +208,11 @@ async def delete_directory(): container_name = data["container_name"] folder_name = data["folder_name"] if container_name and folder_name: - container_client = await azure_storage_api.mount_container( - app.config["BLOB_CLIENT"], container_name, create_container=False + container_client = await azure_storage.mount_container( + CONNECTION_STRING, container_name, create_container=True ) if container_client: - folder_uuid = await azure_storage_api.get_folder_uuid( + folder_uuid = await azure_storage.get_folder_uuid( container_client, folder_name ) if folder_uuid: @@ -198,7 +228,7 @@ async def delete_directory(): else: raise DeleteDirectoryRequestError("missing container or directory name") - except (KeyError, TypeError, azure_storage_api.MountContainerError, ResourceNotFoundError, DeleteDirectoryRequestError, ServiceResponseError) as error: + except (KeyError, TypeError, azure_storage.MountContainerError, ResourceNotFoundError, DeleteDirectoryRequestError, ServiceResponseError) as error: print(error) return jsonify([f"DeleteDirectoryRequestError: {str(error)}"]), 400 @@ -212,15 +242,15 @@ async def list_directories(): data = await request.get_json() container_name = data["container_name"] if container_name: - container_client = await azure_storage_api.mount_container( - app.config["BLOB_CLIENT"], container_name, create_container=True + container_client = await azure_storage.mount_container( + CONNECTION_STRING, container_name, create_container=True ) - response = await azure_storage_api.get_directories(container_client) + response = await azure_storage.get_directories(container_client) return jsonify(response), 200 else: raise ListDirectoriesRequestError("Missing container name") - except (KeyError, TypeError, ListDirectoriesRequestError, azure_storage_api.MountContainerError) as error: + except (KeyError, TypeError, ListDirectoriesRequestError, azure_storage.MountContainerError) as error: print(error) return jsonify([f"ListDirectoriesRequestError: {str(error)}"]), 400 @@ -235,10 +265,10 @@ async def create_directory(): container_name = data["container_name"] folder_name = data["folder_name"] if container_name and folder_name: - container_client = await azure_storage_api.mount_container( - app.config["BLOB_CLIENT"], container_name, create_container=False + container_client = await azure_storage.mount_container( + CONNECTION_STRING, container_name, create_container=True ) - response = await azure_storage_api.create_folder( + response = await azure_storage.create_folder( container_client, folder_name ) if response: @@ -248,7 +278,7 @@ async def create_directory(): else: raise CreateDirectoryRequestError("missing container or directory name") - except (KeyError, TypeError, CreateDirectoryRequestError, azure_storage_api.MountContainerError) as error: + except (KeyError, TypeError, CreateDirectoryRequestError, azure_storage.MountContainerError) as error: print(error) return jsonify([f"CreateDirectoryRequestError: {str(error)}"]), 400 @@ -298,7 +328,7 @@ async def image_validation(): if header.lower() != expected_header: raise ImageValidationError(f"invalid file header: {header}") - validator = await azure_storage_api.generate_hash(image_bytes) + validator = await azure_storage.generate_hash(image_bytes) CACHE['validators'].append(validator) return jsonify([validator]), 200 @@ -325,13 +355,13 @@ async def inference_request(): container_name = data["container_name"] imageDims = data["imageDims"] image_base64 = data["image"] - + user_id = data["userId"] + area_ratio = data.get("area_ratio", 0.5) color_format = data.get("color_format", "hex") print(f"Requested by user: {container_name}") # TODO: Transform into logging pipelines_endpoints = CACHE.get("pipelines") - blob_service_client = app.config.get("BLOB_CLIENT") validators = CACHE.get("validators") if not (folder_name and container_name and imageDims and image_base64): @@ -352,14 +382,21 @@ async def inference_request(): cache_json_result = [encoded_data] image_bytes = base64.b64decode(encoded_data) - container_client = await azure_storage_api.mount_container( - blob_service_client, container_name, create_container=True + container_client = await azure_storage.mount_container( + CONNECTION_STRING, container_name, create_container=True ) - hash_value = await azure_storage_api.generate_hash(image_bytes) - await azure_storage_api.upload_image( - container_client, folder_name, image_bytes, hash_value + + # Open db connection + connection = datastore.get_connection() + cursor = datastore.get_cursor(connection) + + image_hash_value = await azure_storage.generate_hash(image_bytes) + picture_id = await datastore.get_picture_id( + cursor, user_id, image_hash_value, container_client ) - + # Close connection + datastore.end_query(connection, cursor) + pipeline = pipelines_endpoints.get(pipeline_name) for idx, model in enumerate(pipeline): @@ -378,21 +415,30 @@ async def inference_request(): # upload the inference results to the user's container as async task app.add_background_task( - azure_storage_api.upload_inference_result, + azure_storage.upload_inference_result, container_client, folder_name, result_json_string, - hash_value, + image_hash_value, ) + + # Open db connection + connection = datastore.get_connection() + cursor = datastore.get_cursor(connection) + + saved_result_json = await datastore.save_inference_result(cursor, user_id, processed_result_json[0], picture_id, pipeline_name, 1) + + # Close connection + datastore.end_query(connection, cursor) + # return the inference results to the client print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") # TODO: Transform into logging - return jsonify(processed_result_json), 200 + return jsonify(saved_result_json), 200 - except (inference.ModelAPIErrors, KeyError, TypeError, ValueError, InferenceRequestError, azure_storage_api.MountContainerError) as error: + except (inference.ModelAPIErrors, KeyError, TypeError, ValueError, InferenceRequestError, azure_storage.MountContainerError) as error: print(error) return jsonify(["InferenceRequestError: " + error.args[0]]), 400 - @app.get("/seed-data/") async def get_seed_data(seed_name): """ @@ -428,6 +474,69 @@ async def get_model_endpoints_metadata(): return jsonify("Error retrieving model endpoints metadata.", 404) +@app.get("/seeds") +async def get_seeds(): + """ + Returns JSON containing the model seeds metadata + """ + seeds = await datastore.get_all_seeds() + if seeds : + return jsonify(seeds), 200 + else: + return jsonify("Error retrieving seeds", 404) + + +@app.post("/feedback-positive") +async def feedback_positive(): + """ + Receives inference feedback from the user and stores it in the database. + --> Perfect Inference Feedback : + - send the user_id and the inference_id to the datastore so the inference will be verified and not modified + Params : + - user_id : the user id that send the feedback + - inference_id : the inference id that the user want to modify + - boxes_id : the boxes id that the user want to modify + """ + try: + data = await request.get_json() + user_id = data["userId"] + inference_id = data["inferenceId"] + boxes_id = data["boxes"][0] + if inference_id and user_id and boxes_id: + await datastore.save_perfect_feedback(inference_id, user_id, boxes_id) + return jsonify([True]), 200 + else: + raise APIErrors("missing argument(s)") + except (KeyError, TypeError, APIErrors) as error: + return jsonify([f"APIErrors while sending the inference feedback: {str(error)}"]), 400 + +@app.post("/feedback-negative") +async def feedback_negative(): + """ + Receives inference feedback from the user and stores it in the database. + --> Annoted Inference Feedback : + - send the user_id and the inference_id to the datastore so the inference will be verified + - also send the feedback to the datastore to modified the inference + + Params : + - inference_feedback : correction of the inference from the user if not a perfect inference + - user_id : the user id that send the feedback + - inference_id : the inference id that the user want to modify + - boxes_id : the boxes id that the user want to modify + """ + try: + data = await request.get_json() + inference_feedback = data["inferenceFeedback"] + user_id = data["userId"] + inference_id = data["inferenceId"] + boxes_id = data["boxes"][0] + if inference_id and user_id and boxes_id and inference_feedback : + await datastore.save_annoted_feedback(inference_id, user_id, boxes_id, inference_feedback) + else: + raise APIErrors("missing argument(s)") + except (KeyError, TypeError, APIErrors) as error: + return jsonify([f"APIErrors while sending the inference feedback: {str(error)}"]), 400 + @app.get("/health") async def health(): return "ok", 200 @@ -457,13 +566,12 @@ async def test(): return CACHE["endpoints"], 200 - + async def record_model(pipeline: namedtuple, result: list): new_entry = [{"name": model.name, "version": model.version} for model in pipeline] result[0]["models"] = new_entry return json.dumps(result, indent=4) - async def fetch_json(repo_URL, key, file_path): """ Fetches JSON document from a GitHub repository. @@ -484,24 +592,19 @@ async def fetch_json(repo_URL, key, file_path): return result_json -async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version, cipher_suite): +async def get_pipelines(cipher_suite=Fernet(FERNET_KEY)): """ Retrieves the pipelines from the Azure storage API. Returns: - list: A list of dictionaries representing the pipelines. """ - try: - app.config["BLOB_CLIENT"] = await azure_storage_api.get_blob_client(connection_string) - result_json = await azure_storage_api.get_pipeline_info(app.config["BLOB_CLIENT"], pipeline_blob_name, pipeline_version) - except (azure_storage_api.AzureAPIErrors) as error: - print(error) - raise ServerError("server errror: could not retrieve the pipelines") from error + result_json = await datastore.get_pipelines() models = () for model in result_json.get("models"): m = Model( - request_function.get(model.get("endpoint_name")), + request_function.get(model.get("model_name")), model.get("model_name"), model.get("version"), # To protect sensible data (API key and model endpoint), we encrypt it when diff --git a/docs/nachet-batch-import-documentation.md b/docs/nachet-batch-import-documentation.md new file mode 100644 index 0000000..a3d08a3 --- /dev/null +++ b/docs/nachet-batch-import-documentation.md @@ -0,0 +1,62 @@ +# Import Folder Images + +## Executive summary + +With the development of the datastore for Nachet, new opportunities arise. One +of them is to build a functionality to allow our thrust users to perform a batch +import of images into the database. With the introduction of this new feature, +users can now import an entire image folder at once, drastically reducing the +time and effort required. + +Previously, users had to manually upload images into the blob storage, which was +a time-consuming process, especially when dealing with large volumes of data. +With the introduction of this feature, users will be able to import images for +AI training with Nachet directly, which simplifies the image import process but +also enhances the system’s overall efficiency and usability. + +## Prerequisites + +- The backend need to have a connection with the datastore + +## Sequence Diagram + +```mermaid +sequenceDiagram; + title: Batch Image Import 1.0.0 + autonumber + actor User + participant Frontend + participant Backend + participant Datastore + + User ->>+Frontend: Upload session request + Frontend->>+Backend: HTTP Post Req. + Backend->>+Datastore: get_all_seeds_names(cursor) + Datastore-->>-Backend: seed_names res. + Backend-->>-Frontend: seedNames res. + Frontend -->>-User: Show session form + User -) User: Fill form :
Seed selection, nb Seeds/Pic, Zoom + User -)+Frontend: Upload: session folder + Frontend ->>+Backend: HTTP Post Req. + Backend->>+Datastore: is_user_registered(cursor, email) + Datastore-->>-Backend: user_id res. + Backend -)Datastore: upload_picture_set (cursor, pictures, user_id, **data) + Note over Backend, Datastore: data contains at least the
following value: seed_name, zoom_level, nb_seeds +``` + +The complete diagram is part of the datastore documentation. You can see it +here: + +[Trusted user upload process](https://github.com/ai-cfia/nachet-datastore/blob/issue13-create-process-to-upload-metadata-for-trusted-users/doc/trusted-user-upload.md) + +### API Route + +#### /picture-form + +The `picture-form` is the route to call to get all the information needed for +the frontend to build the form to upload the pictures to the database. + +#### /upload-pictures + +The `/upload-pictures` route is the API endpoint responsible to assure the transit +of the picture to the database. diff --git a/docs/nachet-inference-documentation.md b/docs/nachet-inference-documentation.md index 3bed546..d0f3b23 100644 --- a/docs/nachet-inference-documentation.md +++ b/docs/nachet-inference-documentation.md @@ -65,31 +65,34 @@ sequenceDiagram actor Client participant Frontend participant Backend + participant Datastore participant Blob storage participant Model Backend-)+Backend: run() - Note over Backend,Blob storage: initialisation - Backend-)Backend: before_serving() - Backend-)Backend: get_pipelines() - alt - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: return pipelines_models.json - else - Backend-)Frontend: error 500 Failed to retrieve data from the repository - end - Note over Backend,Blob storage: end of initialisation + Note over Backend,Datastore: initialisation + Backend-)+Backend: before_serving() + Backend-)+Datastore: get_all_seeds_names() + Datastore--)-Backend: return seeds.json + Backend-)+Datastore: get_pipelines() + Datastore--)-Backend: return pipelines_models.json + + Note over Backend,Datastore: end of initialisation Client->>+Frontend: applicationStart() - Frontend-)Backend: HTTP POST req. + Frontend-)Backend: HTTP GET req. "/model-endpoints-metadata" Backend-)Backend: get_model_endpoints_metadata() Backend--)Frontend: Pipelines names res. Note left of Backend: return pipelines names and metadata + Frontend-)Backend: HTTP GET req. "/get-user-id" + Backend-)Backend: get_user_id() + Backend--)Frontend: user_id + Note left of Backend: return user_id from given email Frontend->>Client: application is ready Client-->>Frontend: client ask action from specific pipeline - Frontend-)Backend: HTTP POST req. - Backend-)Backend: inference_request(pipeline_name, folder_name, container_name, imageDims, image) + Frontend-)Backend: HTTP POST req. "/inf" + Backend-)Backend: inference_request(model_name, validator, folder_name, container_name, imageDims, image, userId) alt missing arguments Backend-)Frontend: Error 400 missing arguments end @@ -100,23 +103,31 @@ sequenceDiagram Backend-)Frontend: Error 400 wrong header on file end - Backend-)Backend: mount_container(connection_string(Environnement Variable, container_name)) - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: container_client + Backend-)+Datastore: mount_container(connection_string(Environnement Variable, container_name)) + Datastore-)+Blob storage: HTTP POST req. + Blob storage--)-Datastore: container_client + Datastore--)-Backend: container_client + + Note over Backend,Datastore: Open db connection + Backend-)+Datastore: get_connection() + Datastore--)-Backend: connection + Backend-)+Datastore: get_cursor(connection) + Datastore--)-Backend: cursor - Backend-)Backend: Generate Hash(image_bytes) + Note over Backend,Datastore: Send picture to the datastore to upload it + Backend-)Datastore: Generate Hash(image_bytes) - Backend-)Backend: upload_image(container_client, folder_name, image_bytes, hash_value) - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: blob_name + Backend-)+Datastore: upload_picture(cursor, user_id, image_hash_value, container_client) + Datastore-)+Blob storage: HTTP POST req. + Blob storage--)-Datastore: picture_id + Datastore--)-Backend: picture_id - Backend-)Backend: get_blob(container_client, blob_name) - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: blob + Note over Backend,Datastore:Commit query and close db connection + Backend-)Datastore: end_query() loop for every model in pipeline - Backend-)Backend: model.entry_function(model, previous_result) - note over Backend, Blob storage: Every model has is own entry_function + Backend-)Backend: model.request_function(model, previous_result) + note over Backend, Blob storage: Every model has is own request_function Backend-)Backend: request_factory(previous_result, model) Backend-)Backend: urllib.request.Request(endpoint_url, body, header) Backend-)+Model: HTTP POST req. @@ -126,15 +137,27 @@ sequenceDiagram end end note over Backend, Blob storage: End of the loop - par Backend to Frontend - Backend-)Backend: inference.process_inference_results(result_json, imageDims) - Backend--)Frontend: Processed result res. - Frontend--)-Client: display result - and Backend to Blob storage - note over Backend, Blob storage: record the result produced by the model - Backend-)Backend: upload_inference_result(container_client, folder_name, result_json_string, hash_value) - Backend-)-Blob storage: HTTP POST req. - end + + Backend-)Backend: inference.process_inference_results(result_json, imageDims) + + note over Backend, Blob storage: record the result produced by the model + Backend-)Backend: upload_inference_result(container_client, folder_name, result_json_string, hash_value) + Backend-)-Blob storage: HTTP POST req. + + Note over Backend,Datastore: Open db connection + Backend-)+Datastore: get_connection() + Datastore--)-Backend: connection + Backend-)+Datastore: get_cursor(connection) + Datastore--)-Backend: cursor + note over Backend, Datastore : send inference result to datastore to save it + Backend-)+Datastore : register_inference_result(cursor, user_id,processed_result_json, picture_id, pipeline_name) + Datastore--)-Backend :save_result_json + Note over Backend,Datastore:Commit query and close db connection + Backend-)Datastore: end_query() + + Backend--)Frontend: Send the saved result + Frontend--)-Client: display result + ``` ![footer_for_diagram](https://github.com/ai-cfia/nachet-backend/assets/96267006/cf378d6f-5b20-4e1d-8665-2ba65ed54f8e) @@ -150,11 +173,14 @@ and if the image sent for analysis has a valid header. If all the above checks pass, the function initializes or finds the user blob container and uploads the image. Next, it requests an inference from every model -in the pipeline. Each model specifies their `entry_function` (how to call and +in the pipeline. Each model specifies their `request_function` (how to call and retrieve data) and whether they have a `process_inference` function. Based on these indications, the results are returned and stored in the cache. -If no other model is called, the last result is then processed and sent to the frontend. +If no other model is called, the last result is then processed and register by +the datastore. The inferences are saved so the users could give feedback for +training and statistics purposes. The inference result is then sent to the +frontend. ### Input and Output for inference request @@ -167,6 +193,7 @@ The inference request will process the following parameters: |container_name | The user's container| |imageDims | The dimension of the image| |image | The image encoded in b64 (ASCII)| +|userId | The user's id in db Note that since the information is received from the frontend, the model_name is an abstraction for a pipeline. @@ -174,14 +201,18 @@ an abstraction for a pipeline. The inference request will return a list with the following information: |key parameters | hierarchy Levels | Return Value | |--|--|--| -|Filename| 0 | Contains the filename of the image| |Boxes | 0 | Contains all the boxes returned by the inference request| +|Filename| 0 | Contains the filename of the image| +|inference_id| 0 | Inference id after it has been saved in the database| |labelOccurence | 0 | Contains the number of label occurence| |totalBoxes | 0 | Boxes total number| +|models | 0 | Models of the pipeline| |Box | 1 | Contains all the information of one seed in the image| +|box_id | 1 | box id after it has been saved in the database| |label | 1 | Contains the top label for the seed| |score | 1 | Contains the top score for the seed| |topN | 1 | Contains the top N scores for the seed| +|top_id | 1 | id of the top result| |overlapping | 1 | Contains a boolean to tell if the box overlap with another one| |overlappingIndices | 1 | Contains the index of the overlapping box| |topX | 2 | The top x value of the box around a seed| @@ -189,7 +220,8 @@ The inference request will return a list with the following information: |bottomX | 2 | The bottom x value of the box around a seed| |bottomY| 2 | The bottom y value of the box around a seed| -*for more look at [nachet-model-documentation](https://github.com/ai-cfia/nachet-backend/blob/51-implementing-2-models/docs/nachet-model-documentation.md#return-value-of-models)* +*for more look at +[nachet-model-documentation](https://github.com/ai-cfia/nachet-backend/blob/51-implementing-2-models/docs/nachet-model-documentation.md#return-value-of-models)* **topN** contains the top 5 predictions of the models: @@ -197,22 +229,27 @@ The inference request will return a list with the following information: "topN": [ { "label": "seed_name", + "object_id":"xxxx-xxxx-xxxx", "score": 0.75 } { "label": "seed_name", + "object_id":"xxxx-xxxx-xxxx", "score": 0.18 } { "label": "seed_name", + "object_id":"xxxx-xxxx-xxxx", "score": 0.05 } { "label": "seed_name", + "object_id":"xxxx-xxxx-xxxx", "score": 0.019 } { "label": "seed_name", + "object_id":"xxxx-xxxx-xxxx", "score": 0.001 } ] @@ -221,9 +258,9 @@ The inference request will return a list with the following information: ### Blob storage and Pipeline versioning To keep track of the various pipeline iterations and versions, JSON files are -stored in the blob storage. Users can add the JSON to the blob storage -using the `pipelines_version_insertion.py` script. This allows for easy -management of model and pipeline history. +stored in the blob storage. Users can add the JSON to the blob storage using the +`pipelines_version_insertion.py` script. This allows for easy management of +model and pipeline history. To use the script, 3 environment variables are necessary: @@ -232,35 +269,35 @@ To use the script, 3 environment variables are necessary: * NACHET_BLOB_PIPELINE_VERSION * Containing the version the user wants to select * NACHET_BLOB_PIPELINE_DECRYPTION_KEY - * The key to decrypt sensible data such as the API key and the endpoint of a model. + * The key to decrypt sensible data such as the API key and the endpoint of a + model. #### In the code -In the backend, the pipelines are retrieved using the `get_pipelines` function. -This function retrieves the data from the blob storage and stores the pipeline in -the `CACHE["endpoint"]` variable. This is the variable that feeds the `models` -information and metadata to the frontend. +In the backend, the pipelines are retrieved using the `get_pipelines` function +which call the get_ml_structure of the datastore. This function retrieves the +data from the database. Then the pipelines are stored in the `CACHE["endpoint"]` +variable. This is the variable that feeds the `models` information and metadata +to the frontend. + +In the `app.py` ```python -async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version, cipher_suite): +async def get_pipelines(cipher_suite=Fernet(FERNET_KEY)): """ Retrieves the pipelines from the Azure storage API. Returns: - list: A list of dictionaries representing the pipelines. """ - try: - app.config["BLOB_CLIENT"] = await azure_storage_api.get_blob_client(connection_string) - result_json = await azure_storage_api.get_pipeline_info(app.config["BLOB_CLIENT"], pipeline_blob_name, pipeline_version) - except (azure_storage_api.AzureAPIErrors) as error: - print(error) - raise ServerError("server errror: could not retrieve the pipelines") from error + result_json = await datastore.get_pipelines() models = () for model in result_json.get("models"): m = Model( - request_function.get(model.get("api_call_function")), + request_function.get(model.get("model_name")), model.get("model_name"), + model.get("version"), # To protect sensible data (API key and model endpoint), we encrypt it when # it's pushed into the blob storage. Once we retrieve the data here in the # backend, we need to decrypt the byte format to recover the original @@ -278,6 +315,24 @@ async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version, return result_json.get("pipelines") ``` +Then in the datastore module that call the datastore repo + +```python +async def get_pipelines() -> list: + + """ + Retrieves the pipelines from the Datastore + """ + try: + connection = get_connection() + cursor = get_cursor(connection) + pipelines = await datastore.get_ml_structure(cursor) + return pipelines + except Exception as error: # TODO modify Exception for more specific exception + raise GetPipelinesError(error.args[0]) + +``` + ### Available Version of the JSON file |Version|Creation Date| Pipelines| diff --git a/model/__init__.py b/model/__init__.py index bdb979b..6debf77 100644 --- a/model/__init__.py +++ b/model/__init__.py @@ -4,8 +4,8 @@ from model.six_seeds import request_inference_from_nachet_6seeds request_function = { - "swin-endpoint": request_inference_from_swin, - "seed-detector": request_inference_from_seed_detector, + "swinv1-base-dataaugv2-1": request_inference_from_swin, + "seed-detector-1": request_inference_from_seed_detector, "test": request_inference_from_test, - "nachet-6seeds": request_inference_from_nachet_6seeds + "m-14of15seeds-6seedsmag": request_inference_from_nachet_6seeds } diff --git a/model/swin.py b/model/swin.py index 79561c7..3a51594 100644 --- a/model/swin.py +++ b/model/swin.py @@ -26,7 +26,10 @@ def process_swin_result(img_box:dict, results: dict) -> list: img_box[0]['boxes'][i]['label'] = result[0].get("label") img_box[0]['boxes'][i]['score'] = result[0].get("score") img_box[0]['boxes'][i]["topN"] = [d for d in result] - + + # Adding the "filename" field (mandatory) + img_box[0]['filename'] = "default_filename" + return img_box diff --git a/requirements.txt b/requirements.txt index 4f0ec0d..30070c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ cryptography pyyaml pydantic python-magic +nachet-datastore @git+https://github.com/ai-cfia/nachet-datastore.git@main diff --git a/azure_storage/__init__.py b/storage/__init__.py similarity index 100% rename from azure_storage/__init__.py rename to storage/__init__.py diff --git a/azure_storage/azure_storage_api.py b/storage/azure_storage_api.py similarity index 100% rename from azure_storage/azure_storage_api.py rename to storage/azure_storage_api.py diff --git a/storage/datastore_storage_api.py b/storage/datastore_storage_api.py new file mode 100644 index 0000000..a3274a9 --- /dev/null +++ b/storage/datastore_storage_api.py @@ -0,0 +1,126 @@ +""" +This module provide an absraction to the nachet-datastore interface. +""" +import datastore +from datastore import db +from datastore import user as user_datastore +import datastore.bin.deployment_mass_import + +import datastore.bin.upload_picture_set +import datastore.db.queries.seed as seed_queries + +class DatastoreError(Exception): + pass + +class SeedNotFoundError(DatastoreError): + pass + +class GetPipelinesError(DatastoreError): + pass + +class UserNotFoundError(DatastoreError): + pass + +def get_connection() : + return db.connect_db() + +def get_cursor(connection): + return db.cursor(connection) + +def end_query(connection, cursor): + db.end_query(connection, cursor) + +def get_all_seeds() -> list: + + """ + Return all seeds name register in the Datastore. + """ + try: + connection = get_connection() + cursor = get_cursor(connection) + return datastore.get_seed_info(cursor) + except Exception as error: # TODO modify Exception for more specific exception + raise SeedNotFoundError(error.args[0]) + + +def get_all_seeds_names() -> list: + + """ + Return all seeds name register in the Datastore. + """ + try: + connection = get_connection() + cursor = get_cursor(connection) + return seed_queries.get_all_seeds_names(cursor) + except Exception as error: # TODO modify Exception for more specific exception + raise SeedNotFoundError(error.args[0]) + +def get_seeds(expression: str) -> list: + """ + Return a list of all seed that contains the expression + """ + connection = get_connection() + cursor = get_cursor(connection) + return list(filter(lambda x: expression in x, get_all_seeds_names(cursor))) + +def get_user_id(email: str) -> str: + """ + Return the user_id of the user + """ + connection = get_connection() + cursor = get_cursor(connection) + if user_datastore.is_user_registered(cursor, email): + return user_datastore.get_user_id(cursor, email) + else : + raise UserNotFoundError("User not found") + +async def validate_user(cursor, email: str, connection_string) -> datastore.User: + """ + Return True if user is valid, False otherwise + """ + if user_datastore.is_user_registered(cursor, email): + user = datastore.get_User(email, cursor) + else : + user = await datastore.new_user(cursor, email, connection_string) + return user + + +async def get_picture_id(cursor, user_id, image_hash_value, container_client) : + """ + Return the picture_id of the image + """ + picture_id = await datastore.upload_picture(cursor, str(user_id), image_hash_value, container_client) + return picture_id + +def upload_picture_set(**kwargs): + connection = get_connection() + cursor = get_cursor(connection) + return datastore.bin.upload_picture_set.upload_picture_set(cursor, **kwargs) + +async def get_pipelines() -> list: + + """ + Retrieves the pipelines from the Datastore + """ + try: + connection = get_connection() + cursor = get_cursor(connection) + pipelines = await datastore.get_ml_structure(cursor) + return pipelines + except Exception as error: # TODO modify Exception for more specific exception + raise GetPipelinesError(error.args[0]) + +async def save_inference_result(cursor, user_id:str, inference_dict, picture_id:str, pipeline_id:str, type:int): + return await datastore.register_inference_result(cursor, user_id, inference_dict, picture_id, pipeline_id, type) + +async def save_perfect_feedback(inference_id:str, user_id:str): + # peut-être --> user_id = user.get_user_id(cursor, email) (genre j'ai l'email et pas le id direct) + connection = get_connection() + cursor = get_cursor(connection) + await datastore.register_perfect_inference_feeback(inference_id, user_id, cursor) + +async def save_annoted_feedback(inference_id:str, user_id:str, inference_feedback:dict): + # peut-être --> user_id = user.get_user_id(cursor, email) (genre j'ai l'email et pas le id direct) + connection = get_connection() + cursor = get_cursor(connection) + await datastore.register_annoted_inference_feeback(inference_id, user_id, inference_feedback, cursor) diff --git a/tests/1310_1.png b/tests/img/1310_1.png similarity index 100% rename from tests/1310_1.png rename to tests/img/1310_1.png diff --git a/tests/test_azure_storage_api.py b/tests/test_azure_storage_api.py index 53ac9de..6f0e72b 100644 --- a/tests/test_azure_storage_api.py +++ b/tests/test_azure_storage_api.py @@ -2,7 +2,7 @@ import unittest import asyncio from unittest.mock import patch, Mock, MagicMock -from azure_storage.azure_storage_api import ( +from storage.azure_storage_api import ( mount_container, get_blob, get_pipeline_info, diff --git a/tests/test_inference_request.py b/tests/test_inference_request.py index 4b7f9b0..fcc7ef0 100644 --- a/tests/test_inference_request.py +++ b/tests/test_inference_request.py @@ -20,7 +20,7 @@ def setUp(self) -> None: ) self.pipeline = json.loads(asyncio.run(response.get_data()))[0] current_dir = os.path.dirname(__file__) - image_path = os.path.join(current_dir, '1310_1.png') + image_path = os.path.join(current_dir, 'img/1310_1.png') self.endpoints = "/model-endpoints-metadata" self.inference = "/inf" self.container_name = "bab1da84-5937-4016-965e-67e1ea6e29c4" @@ -36,7 +36,7 @@ def tearDown(self) -> None: self.image_src = None self.test = None - @patch("azure_storage.azure_storage_api.mount_container") + @patch("storage.azure_storage_api.mount_container") def test_inference_request_successful(self, mock_container): # Mock azure client services mock_blob = Mock() @@ -78,6 +78,7 @@ def test_inference_request_successful(self, mock_container): "Access-Control-Allow-Origin": "*", }, json={ + "userId":"3e4d7d70-68d2-4302-a377-a869f1fd455e", "image": self.image_header + self.image_src, "imageDims": [720,540], "folder_name": self.folder_name, @@ -94,7 +95,7 @@ def test_inference_request_successful(self, mock_container): print(expected_keys == responses) self.assertEqual(responses, expected_keys) - @patch("azure_storage.azure_storage_api.mount_container") + @patch("storage.azure_storage_api.mount_container") def test_inference_request_unsuccessfull(self, mock_container): # Mock azure client services mock_blob = Mock() @@ -123,6 +124,7 @@ def test_inference_request_unsuccessfull(self, mock_container): "Access-Control-Allow-Origin": "*", }, json={ + "userId":"3e4d7d70-68d2-4302-a377-a869f1fd455e", "image": self.image_header, "imageDims": [720,540], "folder_name": self.folder_name, @@ -140,6 +142,7 @@ def test_inference_request_missing_argument(self): expected = ("InferenceRequestError: missing request arguments: either folder_name, container_name, imageDims or image is missing") data = { + "userId":"3e4d7d70-68d2-4302-a377-a869f1fd455e", "image": self.image_header, "imageDims": [720,540], "folder_name": self.folder_name, @@ -189,6 +192,7 @@ def test_inference_request_wrong_pipeline_name(self): "Access-Control-Allow-Origin": "*", }, json={ + "userId":"3e4d7d70-68d2-4302-a377-a869f1fd455e", "image": self.image_src, "imageDims": [720,540], "folder_name": self.folder_name,