Skip to content

Commit

Permalink
fixes #60: Move pipeline related files into
Browse files Browse the repository at this point in the history
pipelines, and modify template value for model and
pipeline
  • Loading branch information
Maxence Guindon committed Apr 15, 2024
1 parent 7df50f0 commit 9124f99
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 85 deletions.
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version,
models = ()
for model in result_json.get("models"):
m = Model(
request_function.get(model.get("api_call_function")),
request_function.get(model.get("endpoint_name")),
model.get("model_name"),
# To protect sensible data (API key and model endpoint), we encrypt it when
# it's pushed into the blob storage. Once we retrieve the data here in the
Expand Down
129 changes: 121 additions & 8 deletions docs/nachet-model-documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,17 +176,14 @@ pipelines:
version:
description:
job_name:
dataset:
metrics:
identifiable:
dataset_description:
accuracy:
default:

models:
- task:
api_call_function:
endpoint:
api_key:
inference_function:
content_type:
deployment_platform:
endpoint_name:
Expand All @@ -196,7 +193,123 @@ models:
version:
description:
job_name:
dataset:
metrics:
identifiable:
dataset_description:
accuracy:
```
### Key Description
#### File Specific Keys
|Key|Description|Expected Value Format|
|--|--|--|
|version|The version of the file|0.0.0|
|date|The date the file was upload|202x-mm-dd|
|pipelines|A list of available pipeline||
|models|A list of available model||
#### Pipeline Specific Keys
|Key|Description|Expected Value Format|
|--|--|--|
|models|A list of the model name used by the pipeline in order|["that_model_name", "other_model_name"]|
|pipeline_name|The pipeline name|"First Pipeline"|
|created_by|The creator of the pipeline|"Avery GoodDataScientist"|
|version|The version of the pipeline|1|
|description|The pipeline's description|"Pipeline Description"|
|job_name|The pipeline job name|"Job Name"|
|dataset_description|A brief description of the dataset|"Dataset Description"|
|Accuracy|The prediction accuracy of the pipeline|0.8302|
|default|Determine if the pipeline is the default one|true or false|
#### Model Specific Keys
|Key|Description|Expected Value Format|
|--|--|--|
|tasks|The model task|"object-detection", "classification" or "segmentation"|
|endpoint|The model endpoint|["https://that-model.inference.ml.com/score"](#model-specific-keys)|
|api_key|Secret key to access the API|"SeCRetKeys"|
|content_type|The content type the model can process|"application/json"|
|deployment_platform|The platform where the model is host|"azure"|
|endpoint_name|The model endpoint name|"that-model-endpoint"|
|model_name|The name of the model|"that_model_name"|
|created_by|The creator of the model|"Avery GoodDataScientist"|
|creation_date|The creation date of the model|"2024-03-18"|
|version|The version of the model|1|
|description|The description of the model|"Model Description"|
|job_name|The job name of the model|"Job Name"|
|dataset_description|A brief description of the dataset|"Dataset Description"|
|Accuracy|The prediction accuracy of the model|0.9205|
#### JSON Representation and Example
This how the file will be represented in the datastore.
```json
{
"version": "0.1.0",
"date": "2024-02-26",
"pipelines":
[
{
"models": ["that_model_name", "other_model_name"],
"pipeline_name": "First Pipeline",
"created_by": "Avery GoodDataScientist",
"creation_date": "2024-01-01",
"version": 1,
"description": "Pipeline Description",
"job_name": "Job Name",
"dataset_description": "Dataset Description",
"Accuracy": 0.8302,
"default": true
},
{
"models": ["that_model_name"],
"pipeline_name": "Second Pipeline",
"created_by": "Avery GoodDataScientist",
"creation_date": "2024-01-02",
"version": 2,
"description": "Pipeline Description",
"job_name": "Job Name",
"dataset_description": "Dataset Description",
"Accuracy": 0.7989,
"default": true
},
],
"models":
[
{
"task": "classification",
"endpoint": "https://that-model.inference.ml.com/score",
"api_key": "SeCRetKeys",
"content_type": "application/json",
"deployment_platform": "azure",
"endpoint_name": "that-model-endpoint",
"model_name": "that_model_name",
"created_by": "Avery GoodDataScientist",
"creation_date": "2023-12-02",
"version": 5,
"description": "Model Description",
"job_name": "Job Name",
"dataset_description": "Dataset Description",
"Accuracy": 0.6908
},
{
"task": "object-detection",
"endpoint": "https://other-model.inference.ml.com/score",
"api_key": "SeCRetKeys",
"content_type": "application/json",
"deployment_platform": "aws",
"endpoint_name": "other-model-endpoint",
"model_name": "other_model_name",
"created_by": "Avery GoodDataScientist",
"creation_date": "2023-11-25",
"version": 3,
"description": "Model Description",
"job_name": "Job Name",
"dataset_description": "Dataset Description",
"Accuracy": 0.9205
},
]
}
```
6 changes: 3 additions & 3 deletions model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from model.six_seeds import request_inference_from_nachet_6seeds

request_function = {
"swin": request_inference_from_swin,
"seed_detector": request_inference_from_seed_detector,
"swin-endpoint": request_inference_from_swin,
"seed-detector": request_inference_from_seed_detector,
"test": request_inference_from_test,
"nachet_6seeds": request_inference_from_nachet_6seeds
"nachet-6seeds": request_inference_from_nachet_6seeds
}
13 changes: 4 additions & 9 deletions pipeline_template.yaml → pipelines/pipeline_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,20 @@ date:

pipelines:
- models:
model_name:
pipeline_name:
created_by:
creation_date:
version:
description:
job_name:
dataset:
metrics:
identifiable:
dataset_description:
accuracy:
default:

models:
- tasks:
api_call_function:
endpoint:
api_key:
inference_function:
content_type:
deployment_platform:
endpoint_name:
Expand All @@ -32,6 +28,5 @@ models:
version:
description:
job_name:
dataset:
metrics:
identifiable:
dataset_description:
accuracy:
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
2. Call the `pipeline_version_insertion.py` file with the path to the file
containing the pipeline data as an argument directly in the terminal.
Example: pipeline_version_insertion.py /path/to/pipeline.yaml
Example: pipelines/pipeline_version_insertion.py /path/to/pipeline.yaml
3. The function will read the file, encrypt the endpoint and API key, and
upload the pipeline to the specified Azure Blob Storage container.
Expand Down Expand Up @@ -63,37 +63,16 @@ class PipelineInsertionError(Exception):
pass


class Data(BaseModel):
version: str
date: datetime.date
pipelines: list
models: list

@field_validator ("pipelines", mode="before", check_fields=True)
def validate_pipelines(cls, v):
for p in v:
Pipeline(**p)
return v

@field_validator ("models", mode="before", check_fields=True)
def validate_models(cls, v):
for m in v:
Model(**m)
return v


class Pipeline(BaseModel):
models: list
model_name: str
pipeline_name: str
created_by: str
creation_date: str
version: int
description: str
job_name: str
dataset: str
metrics: list
identifiable: list
dataset_description: str
accuracy: float
default: bool

@field_validator ("*", mode="before", check_fields=True)
Expand All @@ -102,7 +81,7 @@ def validate_data(cls, v):
return ""
return v

@field_validator ("metrics", "identifiable", mode="before", check_fields=True)
@field_validator ("models", mode="before", check_fields=True)
def validate_list(cls, v):
if v is None:
return []
Expand All @@ -111,12 +90,11 @@ def validate_list(cls, v):
class Config:
protected_namespaces = ()


class Model(BaseModel):
task: str
api_call_function: str
endpoint: str
api_key: str
inference_function: str
content_type: str
deployment_platform: str
endpoint_name: str
Expand All @@ -126,22 +104,15 @@ class Model(BaseModel):
version: int
description: str
job_name: str
dataset: str
metrics: list
identifiable: list
dataset_description: str
accuracy: float

@field_validator ("*", mode="before", check_fields=True)
def validate_data(cls, v):
if v is None:
return ""
return v

@field_validator ("metrics", "identifiable", mode="before", check_fields=True)
def validate_list(cls, v):
if v is None:
return []
return v

@field_validator ("deployment_platform", mode="before", check_fields=True)
def validate_dict(cls, v):
if v is None:
Expand All @@ -152,6 +123,25 @@ class Config:
protected_namespaces = ()


class Data(BaseModel):
version: str
date: datetime.date
pipelines: list
models: list

@field_validator ("pipelines", mode="before", check_fields=True)
def validate_pipelines(cls, v):
for p in v:
Pipeline(**p)
return v

@field_validator ("models", mode="before", check_fields=True)
def validate_models(cls, v):
for m in v:
Model(**m)
return v


def insert_new_version_pipeline(
pipeline: dict,
blob_service_client: BlobServiceClient,
Expand Down
Loading

0 comments on commit 9124f99

Please sign in to comment.