fixes #60: Move pipeline related files into

pipelines, and modify template value for model and pipeline
ai-cfia · Apr 15, 2024 · 9124f99 · 9124f99
1 parent 7df50f0
commit 9124f99
Show file tree

Hide file tree

Showing 6 changed files with 165 additions and 85 deletions.
diff --git a/app.py b/app.py
@@ -503,7 +503,7 @@ async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version,
     models = ()
     for model in result_json.get("models"):
         m = Model(
-            request_function.get(model.get("api_call_function")),
+            request_function.get(model.get("endpoint_name")),
             model.get("model_name"),
             # To protect sensible data (API key and model endpoint), we encrypt it when
             # it's pushed into the blob storage. Once we retrieve the data here in the

diff --git a/docs/nachet-model-documentation.md b/docs/nachet-model-documentation.md
@@ -176,17 +176,14 @@ pipelines:
     version:
     description:
     job_name:
-    dataset:
-    metrics:
-    identifiable:
+    dataset_description:
+    accuracy:
     default:
 
 models:
   - task:
-    api_call_function:
     endpoint:
     api_key:
-    inference_function:
     content_type:
     deployment_platform:
     endpoint_name:
@@ -196,7 +193,123 @@ models:
     version:
     description:
     job_name:
-    dataset:
-    metrics:
-    identifiable:
+    dataset_description:
+    accuracy:
+```
+
+### Key Description
+
+#### File Specific Keys
+
+|Key|Description|Expected Value Format|
+|--|--|--|
+|version|The version of the file|0.0.0|
+|date|The date the file was upload|202x-mm-dd|
+|pipelines|A list of available pipeline||
+|models|A list of available model||
+
+#### Pipeline Specific Keys
+
+|Key|Description|Expected Value Format|
+|--|--|--|
+|models|A list of the model name used by the pipeline in order|["that_model_name", "other_model_name"]|
+|pipeline_name|The pipeline name|"First Pipeline"|
+|created_by|The creator of the pipeline|"Avery GoodDataScientist"|
+|version|The version of the pipeline|1|
+|description|The pipeline's description|"Pipeline Description"|
+|job_name|The pipeline job name|"Job Name"|
+|dataset_description|A brief description of the dataset|"Dataset Description"|
+|Accuracy|The prediction accuracy of the pipeline|0.8302|
+|default|Determine if the pipeline is the default one|true or false|
+
+#### Model Specific Keys
+
+|Key|Description|Expected Value Format|
+|--|--|--|
+|tasks|The model task|"object-detection", "classification" or "segmentation"|
+|endpoint|The model endpoint|["https://that-model.inference.ml.com/score"](#model-specific-keys)|
+|api_key|Secret key to access the API|"SeCRetKeys"|
+|content_type|The content type the model can process|"application/json"|
+|deployment_platform|The platform where the model is host|"azure"|
+|endpoint_name|The model endpoint name|"that-model-endpoint"|
+|model_name|The name of the model|"that_model_name"|
+|created_by|The creator of the model|"Avery GoodDataScientist"|
+|creation_date|The creation date of the model|"2024-03-18"|
+|version|The version of the model|1|
+|description|The description of the model|"Model Description"|
+|job_name|The job name of the model|"Job Name"|
+|dataset_description|A brief description of the dataset|"Dataset Description"|
+|Accuracy|The prediction accuracy of the model|0.9205|
+
+#### JSON Representation and Example
+
+This how the file will be represented in the datastore.
+
+```json
+{
+    "version": "0.1.0",
+    "date": "2024-02-26",
+    "pipelines":
+    [
+        {
+            "models": ["that_model_name", "other_model_name"],
+            "pipeline_name": "First Pipeline",
+            "created_by": "Avery GoodDataScientist",
+            "creation_date": "2024-01-01",
+            "version": 1,
+            "description": "Pipeline Description",
+            "job_name": "Job Name",
+            "dataset_description": "Dataset Description",
+            "Accuracy": 0.8302,
+            "default": true
+        },
+        {
+            "models": ["that_model_name"],
+            "pipeline_name": "Second Pipeline",
+            "created_by": "Avery GoodDataScientist",
+            "creation_date": "2024-01-02",
+            "version": 2,
+            "description": "Pipeline Description",
+            "job_name": "Job Name",
+            "dataset_description": "Dataset Description",
+            "Accuracy": 0.7989,
+            "default": true
+        },
+    ],
+    "models":
+    [
+        {
+            "task": "classification",
+            "endpoint": "https://that-model.inference.ml.com/score",
+            "api_key": "SeCRetKeys",
+            "content_type": "application/json",
+            "deployment_platform": "azure",
+            "endpoint_name": "that-model-endpoint",
+            "model_name": "that_model_name",
+            "created_by": "Avery GoodDataScientist",
+            "creation_date": "2023-12-02",
+            "version": 5,
+            "description": "Model Description",
+            "job_name": "Job Name",
+            "dataset_description": "Dataset Description",
+            "Accuracy": 0.6908
+        },
+        {
+            "task": "object-detection",
+            "endpoint": "https://other-model.inference.ml.com/score",
+            "api_key": "SeCRetKeys",
+            "content_type": "application/json",
+            "deployment_platform": "aws",
+            "endpoint_name": "other-model-endpoint",
+            "model_name": "other_model_name",
+            "created_by": "Avery GoodDataScientist",
+            "creation_date": "2023-11-25",
+            "version": 3,
+            "description": "Model Description",
+            "job_name": "Job Name",
+            "dataset_description": "Dataset Description",
+            "Accuracy": 0.9205
+        },
+    ]
+}
 ```
diff --git a/model/__init__.py b/model/__init__.py
@@ -4,8 +4,8 @@
 from model.six_seeds import request_inference_from_nachet_6seeds
 
 request_function = {
-    "swin": request_inference_from_swin,
-    "seed_detector": request_inference_from_seed_detector,
+    "swin-endpoint": request_inference_from_swin,
+    "seed-detector": request_inference_from_seed_detector,
     "test": request_inference_from_test,
-    "nachet_6seeds": request_inference_from_nachet_6seeds
+    "nachet-6seeds": request_inference_from_nachet_6seeds
 }
diff --git a/pipeline_template.yaml → pipelines/pipeline_template.yaml b/pipeline_template.yaml → pipelines/pipeline_template.yaml
@@ -5,24 +5,20 @@ date:
 
 pipelines:
   - models:
-    model_name:
     pipeline_name:
     created_by:
     creation_date:
     version:
     description:
     job_name:
-    dataset:
-    metrics:
-    identifiable:
+    dataset_description:
+    accuracy:
     default:
 
 models:
   - tasks:
-    api_call_function:
     endpoint:
     api_key:
-    inference_function:
     content_type:
     deployment_platform:
     endpoint_name:
@@ -32,6 +28,5 @@ models:
     version:
     description:
     job_name:
-    dataset:
-    metrics:
-    identifiable:
+    dataset_description:
+    accuracy:
diff --git a/pipelines_version_insertion.py → pipelines/pipelines_version_insertion.py b/pipelines_version_insertion.py → pipelines/pipelines_version_insertion.py
@@ -11,7 +11,7 @@
 
 2. Call the `pipeline_version_insertion.py` file with the path to the file
     containing the pipeline data as an argument directly in the terminal.
-    Example: pipeline_version_insertion.py /path/to/pipeline.yaml
+    Example: pipelines/pipeline_version_insertion.py /path/to/pipeline.yaml
 
 3. The function will read the file, encrypt the endpoint and API key, and
    upload the pipeline to the specified Azure Blob Storage container.
@@ -63,37 +63,16 @@ class PipelineInsertionError(Exception):
     pass
 
 
-class Data(BaseModel):
-    version: str
-    date: datetime.date
-    pipelines: list
-    models: list
-
-    @field_validator ("pipelines", mode="before", check_fields=True)
-    def validate_pipelines(cls, v):
-        for p in v:
-            Pipeline(**p)
-        return v
-
-    @field_validator ("models", mode="before", check_fields=True)
-    def validate_models(cls, v):
-        for m in v:
-            Model(**m)
-        return v
-
-
 class Pipeline(BaseModel):
     models: list
-    model_name: str
     pipeline_name: str
     created_by: str
     creation_date: str
     version: int
     description: str
     job_name: str
-    dataset: str
-    metrics: list
-    identifiable: list
+    dataset_description: str
+    accuracy: float
     default: bool
 
     @field_validator ("*", mode="before", check_fields=True)
@@ -102,7 +81,7 @@ def validate_data(cls, v):
             return ""
         return v
 
-    @field_validator ("metrics", "identifiable", mode="before", check_fields=True)
+    @field_validator ("models", mode="before", check_fields=True)
     def validate_list(cls, v):
         if v is None:
             return []
@@ -111,12 +90,11 @@ def validate_list(cls, v):
     class Config:
         protected_namespaces = ()
 
+
 class Model(BaseModel):
     task: str
-    api_call_function: str
     endpoint: str
     api_key: str
-    inference_function: str
     content_type: str
     deployment_platform: str
     endpoint_name: str
@@ -126,22 +104,15 @@ class Model(BaseModel):
     version: int
     description: str
     job_name: str
-    dataset: str
-    metrics: list
-    identifiable: list
+    dataset_description: str
+    accuracy: float
 
     @field_validator ("*", mode="before", check_fields=True)
     def validate_data(cls, v):
         if v is None:
             return ""
         return v
 
-    @field_validator ("metrics", "identifiable", mode="before", check_fields=True)
-    def validate_list(cls, v):
-        if v is None:
-            return []
-        return v
-
     @field_validator ("deployment_platform", mode="before", check_fields=True)
     def validate_dict(cls, v):
         if v is None:
@@ -152,6 +123,25 @@ class Config:
         protected_namespaces = ()
 
 
+class Data(BaseModel):
+    version: str
+    date: datetime.date
+    pipelines: list
+    models: list
+
+    @field_validator ("pipelines", mode="before", check_fields=True)
+    def validate_pipelines(cls, v):
+        for p in v:
+            Pipeline(**p)
+        return v
+
+    @field_validator ("models", mode="before", check_fields=True)
+    def validate_models(cls, v):
+        for m in v:
+            Model(**m)
+        return v
+
+
 def insert_new_version_pipeline(
         pipeline: dict,
         blob_service_client: BlobServiceClient,