feat: add percentages dto and endpoint (#177)

* feat: added last_n percentages endpoint and percentages DTO * feat: merged main into branch * feat: alembic migration * feat: updated fastapi * feat: fixed orderby and limit and alembic
radicalbit · Oct 17, 2024 · 5e61425 · 5e61425
1 parent 9a66dc7
commit 5e61425
Show file tree

Hide file tree

Showing 16 changed files with 944 additions and 1,030 deletions.
diff --git a/api/alembic/env.py b/api/alembic/env.py
@@ -70,9 +70,12 @@ def run_migrations_offline() -> None:
         include_name=include_name
     )
 
+    # Here we need to enforce public if schema target_metadata.schema is None, which is default schema (public for postgres) for alembic
+    target_schema = 'public' if target_metadata.schema is None else target_metadata.schema
+
     with context.begin_transaction():
-        context.execute(f'create schema if not exists "{target_metadata.schema}";')
-        context.execute(f'set search_path to "{target_metadata.schema}"')
+        context.execute(f'create schema if not exists "{target_schema}";')
+        context.execute(f'set search_path to "{target_schema}"')
         context.run_migrations()
 
 
@@ -99,9 +102,12 @@ def run_migrations_online() -> None:
             include_name=include_name
         )
 
+        # Here we need to enforce public if schema target_metadata.schema is None, which is default schema (public for postgres) for alembic
+        target_schema = 'public' if target_metadata.schema is None else target_metadata.schema
+
         with context.begin_transaction():
-            context.execute(f'create schema if not exists "{target_metadata.schema}";')
-            context.execute(f'set search_path to "{target_metadata.schema}"')
+            context.execute(f'create schema if not exists "{target_schema}";')
+            context.execute(f'set search_path to "{target_schema}"')
             context.run_migrations()
 
 

diff --git a/api/alembic/versions/dccb82489f4d_add_percentage_column.py b/api/alembic/versions/dccb82489f4d_add_percentage_column.py
@@ -0,0 +1,30 @@
+"""add percentage column
+
+Revision ID: dccb82489f4d
+Revises: 6edab3f23907
+Create Date: 2024-10-17 09:03:48.063883
+
+"""
+from typing import Sequence, Union, Text
+
+from alembic import op
+import sqlalchemy as sa
+from app.db.tables.commons.json_encoded_dict import JSONEncodedDict
+
+# revision identifiers, used by Alembic.
+revision: str = 'dccb82489f4d'
+down_revision: Union[str, None] = '6edab3f23907'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('current_dataset_metrics', sa.Column('PERCENTAGES', JSONEncodedDict(astext_type=Text()), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('current_dataset_metrics', 'PERCENTAGES')
+    # ### end Alembic commands ###
diff --git a/api/app/db/dao/model_dao.py b/api/app/db/dao/model_dao.py
@@ -6,10 +6,12 @@
 from fastapi_pagination import Page, Params
 from fastapi_pagination.ext.sqlalchemy import paginate
 import sqlalchemy
-from sqlalchemy import asc, desc
+from sqlalchemy import asc, desc, func
 from sqlalchemy.future import select as future_select
 
 from app.db.database import Database
+from app.db.tables.current_dataset_metrics_table import CurrentDatasetMetrics
+from app.db.tables.current_dataset_table import CurrentDataset
 from app.db.tables.model_table import Model
 from app.models.model_order import OrderType
 
@@ -57,6 +59,36 @@ def get_all(
         with self.db.begin_session() as session:
             return session.query(Model).where(Model.deleted.is_(False))
 
+    def get_last_n_percentages(self, n_models):
+        with self.db.begin_session() as session:
+            subq = (
+                session.query(
+                    CurrentDataset.model_uuid,
+                    func.max(CurrentDataset.date).label('maxdate'),
+                )
+                .group_by(CurrentDataset.model_uuid)
+                .subquery()
+            )
+            return (
+                session.query(Model, CurrentDatasetMetrics)
+                .join(
+                    CurrentDataset,
+                    CurrentDataset.model_uuid == Model.uuid,
+                )
+                .join(
+                    subq,
+                    (CurrentDataset.model_uuid == subq.c.model_uuid)
+                    & (CurrentDataset.date == subq.c.maxdate),
+                )
+                .join(
+                    CurrentDatasetMetrics,
+                    CurrentDatasetMetrics.current_uuid == CurrentDataset.uuid,
+                )
+                .order_by(Model.updated_at.desc())
+                .limit(n_models)
+                .all()
+            )
+
     def get_all_paginated(
         self,
         params: Params = Params(),

diff --git a/api/app/db/database.py b/api/app/db/database.py
@@ -27,13 +27,13 @@ class Reflected(DeferredReflection):
 
 # https://github.com/sqlalchemy/alembic/discussions/1351
 # If the schema is the default, Alembic needs None otherwise migrations are messed up
-schema_name = (
+fixed_schema = (
     None
     if get_config().db_config.db_schema == 'public'
     else get_config().db_config.db_schema
 )
 BaseTable = declarative_base(
-    metadata=MetaData(schema=schema_name, naming_convention=naming_convention)
+    metadata=MetaData(schema=fixed_schema, naming_convention=naming_convention)
 )
 
 

diff --git a/api/app/db/tables/current_dataset_metrics_table.py b/api/app/db/tables/current_dataset_metrics_table.py
@@ -27,3 +27,4 @@ class CurrentDatasetMetrics(Reflected, BaseTable, BaseDAO):
     data_quality = Column('DATA_QUALITY', JSONEncodedDict, nullable=True)
     drift = Column('DRIFT', JSONEncodedDict, nullable=True)
     statistics = Column('STATISTICS', JSONEncodedDict, nullable=True)
+    percentages = Column('PERCENTAGES', JSONEncodedDict, nullable=True)
diff --git a/api/app/models/metrics/percentages_dto.py b/api/app/models/metrics/percentages_dto.py
@@ -0,0 +1,61 @@
+from typing import Dict, List, Optional
+
+from pydantic import BaseModel, ConfigDict
+from pydantic.alias_generators import to_camel
+
+from app.models.job_status import JobStatus
+
+
+class DetailPercentage(BaseModel):
+    feature_name: str
+    score: float
+
+
+class MetricPercentage(BaseModel):
+    value: float
+    details: List[Optional[DetailPercentage]] = None
+
+
+class Percentages(BaseModel):
+    data_quality: MetricPercentage
+    model_quality: MetricPercentage
+    drift: MetricPercentage
+
+    model_config = ConfigDict(
+        populate_by_name=True, alias_generator=to_camel, protected_namespaces=()
+    )
+
+
+class PercentagesDTO(BaseModel):
+    job_status: JobStatus
+    percentages: Optional[Percentages]
+
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True,
+        populate_by_name=True,
+        alias_generator=to_camel,
+    )
+
+    @staticmethod
+    def from_dict(
+        job_status: JobStatus,
+        percentages_data: Optional[Dict],
+    ) -> 'PercentagesDTO':
+        """Create a PercentagesDTO from a dictionary of data."""
+        percentages = PercentagesDTO._create_percentages(
+            percentages_data=percentages_data
+        )
+
+        return percentages_data(
+            job_status=job_status,
+            percentages=percentages,
+        )
+
+    @staticmethod
+    def _create_percentages(
+        percentages_data: Optional[Dict],
+    ) -> Optional[Percentages]:
+        """Create a specific percentages instance from a dictionary of data."""
+        if not percentages_data:
+            return None
+        return Percentages(**percentages_data)
diff --git a/api/app/models/model_dto.py b/api/app/models/model_dto.py
@@ -11,6 +11,7 @@
 from app.db.dao.reference_dataset_dao import ReferenceDataset
 from app.models.inferred_schema_dto import FieldType, SupportedTypes
 from app.models.job_status import JobStatus
+from app.models.metrics.percentages_dto import Percentages
 from app.models.utils import is_none, is_number, is_number_or_string, is_optional_float
 
 
@@ -216,6 +217,7 @@ class ModelOut(BaseModel):
     latest_current_uuid: Optional[UUID]
     latest_reference_job_status: JobStatus
     latest_current_job_status: JobStatus
+    percentages: Optional[Percentages]
 
     model_config = ConfigDict(
         populate_by_name=True, alias_generator=to_camel, protected_namespaces=()
@@ -226,6 +228,7 @@ def from_model(
         model: Model,
         latest_reference_dataset: Optional[ReferenceDataset] = None,
         latest_current_dataset: Optional[CurrentDataset] = None,
+        percentages: Optional[Percentages] = None,
     ):
         latest_reference_uuid = (
             latest_reference_dataset.uuid if latest_reference_dataset else None
@@ -264,4 +267,5 @@ def from_model(
             latest_current_uuid=latest_current_uuid,
             latest_reference_job_status=latest_reference_job_status,
             latest_current_job_status=latest_current_job_status,
+            percentages=percentages,
         )
diff --git a/api/app/routes/model_route.py b/api/app/routes/model_route.py
@@ -35,6 +35,10 @@ def get_all_models_paginated(
         def get_all_models():
             return model_service.get_all_models()
 
+        @router.get('/last_n', status_code=200, response_model=List[ModelOut])
+        def get_last_n_models(n_models: int):
+            return model_service.get_last_n_models_percentages(n_models)
+
         @router.post('', status_code=201, response_model=ModelOut)
         def create_model(model_in: ModelIn):
             model = model_service.create_model(model_in)

diff --git a/api/app/services/model_service.py b/api/app/services/model_service.py
@@ -85,6 +85,22 @@ def get_all_models(
             model_out_list.append(model_out)
         return model_out_list
 
+    def get_last_n_models_percentages(self, n_models) -> List[ModelOut]:
+        models = self.model_dao.get_last_n_percentages(n_models)
+        model_out_list_tmp = []
+        for model, metrics in models:
+            latest_reference_dataset, latest_current_dataset = self.get_latest_datasets(
+                model.uuid
+            )
+            model_out = ModelOut.from_model(
+                model=model,
+                latest_reference_dataset=latest_reference_dataset,
+                latest_current_dataset=latest_current_dataset,
+                percentages=metrics.percentages,
+            )
+            model_out_list_tmp.append(model_out)
+        return model_out_list_tmp
+
     def get_all_models_paginated(
         self,
         params: Params = Params(),