diff --git a/README.md b/README.md index d78e14d..8cd3534 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,11 @@ backend to function, you will need to add the missing values: accepted by the backend - **NACHET_VALID_DIMENSION**: Contains the valid dimensions for an image to be accepted in the backend. +- **NACHET_MAX_CONTENT_LENGTH**: Set the maximum size of the file that can be + uploaded to the backend. Needs to be the same size as the + `client_max_body_size` + [value](https://github.com/ai-cfia/howard/blob/dedee069f051ba743122084fcb5d5c97c2499359/kubernetes/aks/apps/nachet/base/nachet-ingress.yaml#L13) + set from the deployment in Howard. #### DEPRECATED diff --git a/app.py b/app.py index 84f566c..294279d 100644 --- a/app.py +++ b/app.py @@ -72,19 +72,6 @@ class MaxContentLengthWarning(APIWarnings): NACHET_DATA = os.getenv("NACHET_DATA") -Model = namedtuple( - 'Model', - [ - 'entry_function', - 'name', - 'endpoint', - 'api_key', - 'inference_function', - 'content_type', - 'deployment_platform', - ] -) - try: VALID_EXTENSION = json.loads(os.getenv("NACHET_VALID_EXTENSION")) VALID_DIMENSION = json.loads(os.getenv("NACHET_VALID_DIMENSION")) @@ -115,6 +102,7 @@ class MaxContentLengthWarning(APIWarnings): [ 'request_function', 'name', + 'version', 'endpoint', 'api_key', 'content_type', @@ -371,7 +359,9 @@ async def inference_request(): container_client, folder_name, image_bytes, hash_value ) - for idx, model in enumerate(pipelines_endpoints.get(pipeline_name)): + pipeline = pipelines_endpoints.get(pipeline_name) + + for idx, model in enumerate(pipeline): print(f"Entering {model.name.upper()} model") # TODO: Transform into logging result_json = await model.request_function(model, cache_json_result[idx]) cache_json_result.append(result_json) @@ -383,7 +373,7 @@ async def inference_request(): cache_json_result[-1], imageDims, area_ratio, color_format ) - result_json_string = json.dumps(processed_result_json) + result_json_string = await record_model(pipeline, processed_result_json) # upload the inference results to the user's container as async task app.add_background_task( @@ -455,6 +445,7 @@ async def test(): m = Model( request_function["test"], "test_model1", + 1, "http://localhost:8080/test_model1", "test_api_key", "application/json", @@ -465,6 +456,12 @@ async def test(): return CACHE["endpoints"], 200 + +async def record_model(pipeline: namedtuple, result: list): + new_entry = [{"name": model.name, "version": model.version} for model in pipeline] + result[0]["models"] = new_entry + return json.dumps(result, indent=4) + async def fetch_json(repo_URL, key, file_path): """ @@ -505,6 +502,7 @@ async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version, m = Model( request_function.get(model.get("endpoint_name")), model.get("model_name"), + model.get("version"), # To protect sensible data (API key and model endpoint), we encrypt it when # it's pushed into the blob storage. Once we retrieve the data here in the # backend, we need to decrypt the byte format to recover the original diff --git a/tests/test_inference_request.py b/tests/test_inference_request.py index 8fac0d6..4b7f9b0 100644 --- a/tests/test_inference_request.py +++ b/tests/test_inference_request.py @@ -65,7 +65,8 @@ def test_inference_request_successful(self, mock_container): "score", "topN", "overlapping", - "overlappingIndices" + "overlappingIndices", + "models" } # Test the answers from inference_request