Skip to content

Commit

Permalink
feat: add percentages dto and endpoint (#177)
Browse files Browse the repository at this point in the history
* feat: added last_n percentages endpoint and percentages DTO

* feat: merged main into branch

* feat: alembic migration

* feat: updated fastapi

* feat: fixed orderby and limit and alembic
  • Loading branch information
SteZamboni authored Oct 17, 2024
1 parent 9a66dc7 commit 5e61425
Show file tree
Hide file tree
Showing 16 changed files with 944 additions and 1,030 deletions.
14 changes: 10 additions & 4 deletions api/alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ def run_migrations_offline() -> None:
include_name=include_name
)

# Here we need to enforce public if schema target_metadata.schema is None, which is default schema (public for postgres) for alembic
target_schema = 'public' if target_metadata.schema is None else target_metadata.schema

with context.begin_transaction():
context.execute(f'create schema if not exists "{target_metadata.schema}";')
context.execute(f'set search_path to "{target_metadata.schema}"')
context.execute(f'create schema if not exists "{target_schema}";')
context.execute(f'set search_path to "{target_schema}"')
context.run_migrations()


Expand All @@ -99,9 +102,12 @@ def run_migrations_online() -> None:
include_name=include_name
)

# Here we need to enforce public if schema target_metadata.schema is None, which is default schema (public for postgres) for alembic
target_schema = 'public' if target_metadata.schema is None else target_metadata.schema

with context.begin_transaction():
context.execute(f'create schema if not exists "{target_metadata.schema}";')
context.execute(f'set search_path to "{target_metadata.schema}"')
context.execute(f'create schema if not exists "{target_schema}";')
context.execute(f'set search_path to "{target_schema}"')
context.run_migrations()


Expand Down
30 changes: 30 additions & 0 deletions api/alembic/versions/dccb82489f4d_add_percentage_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""add percentage column
Revision ID: dccb82489f4d
Revises: 6edab3f23907
Create Date: 2024-10-17 09:03:48.063883
"""
from typing import Sequence, Union, Text

from alembic import op
import sqlalchemy as sa
from app.db.tables.commons.json_encoded_dict import JSONEncodedDict

# revision identifiers, used by Alembic.
revision: str = 'dccb82489f4d'
down_revision: Union[str, None] = '6edab3f23907'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('current_dataset_metrics', sa.Column('PERCENTAGES', JSONEncodedDict(astext_type=Text()), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('current_dataset_metrics', 'PERCENTAGES')
# ### end Alembic commands ###
34 changes: 33 additions & 1 deletion api/app/db/dao/model_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
from fastapi_pagination import Page, Params
from fastapi_pagination.ext.sqlalchemy import paginate
import sqlalchemy
from sqlalchemy import asc, desc
from sqlalchemy import asc, desc, func
from sqlalchemy.future import select as future_select

from app.db.database import Database
from app.db.tables.current_dataset_metrics_table import CurrentDatasetMetrics
from app.db.tables.current_dataset_table import CurrentDataset
from app.db.tables.model_table import Model
from app.models.model_order import OrderType

Expand Down Expand Up @@ -57,6 +59,36 @@ def get_all(
with self.db.begin_session() as session:
return session.query(Model).where(Model.deleted.is_(False))

def get_last_n_percentages(self, n_models):
with self.db.begin_session() as session:
subq = (
session.query(
CurrentDataset.model_uuid,
func.max(CurrentDataset.date).label('maxdate'),
)
.group_by(CurrentDataset.model_uuid)
.subquery()
)
return (
session.query(Model, CurrentDatasetMetrics)
.join(
CurrentDataset,
CurrentDataset.model_uuid == Model.uuid,
)
.join(
subq,
(CurrentDataset.model_uuid == subq.c.model_uuid)
& (CurrentDataset.date == subq.c.maxdate),
)
.join(
CurrentDatasetMetrics,
CurrentDatasetMetrics.current_uuid == CurrentDataset.uuid,
)
.order_by(Model.updated_at.desc())
.limit(n_models)
.all()
)

def get_all_paginated(
self,
params: Params = Params(),
Expand Down
4 changes: 2 additions & 2 deletions api/app/db/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ class Reflected(DeferredReflection):

# https://github.com/sqlalchemy/alembic/discussions/1351
# If the schema is the default, Alembic needs None otherwise migrations are messed up
schema_name = (
fixed_schema = (
None
if get_config().db_config.db_schema == 'public'
else get_config().db_config.db_schema
)
BaseTable = declarative_base(
metadata=MetaData(schema=schema_name, naming_convention=naming_convention)
metadata=MetaData(schema=fixed_schema, naming_convention=naming_convention)
)


Expand Down
1 change: 1 addition & 0 deletions api/app/db/tables/current_dataset_metrics_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ class CurrentDatasetMetrics(Reflected, BaseTable, BaseDAO):
data_quality = Column('DATA_QUALITY', JSONEncodedDict, nullable=True)
drift = Column('DRIFT', JSONEncodedDict, nullable=True)
statistics = Column('STATISTICS', JSONEncodedDict, nullable=True)
percentages = Column('PERCENTAGES', JSONEncodedDict, nullable=True)
61 changes: 61 additions & 0 deletions api/app/models/metrics/percentages_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from typing import Dict, List, Optional

from pydantic import BaseModel, ConfigDict
from pydantic.alias_generators import to_camel

from app.models.job_status import JobStatus


class DetailPercentage(BaseModel):
feature_name: str
score: float


class MetricPercentage(BaseModel):
value: float
details: List[Optional[DetailPercentage]] = None


class Percentages(BaseModel):
data_quality: MetricPercentage
model_quality: MetricPercentage
drift: MetricPercentage

model_config = ConfigDict(
populate_by_name=True, alias_generator=to_camel, protected_namespaces=()
)


class PercentagesDTO(BaseModel):
job_status: JobStatus
percentages: Optional[Percentages]

model_config = ConfigDict(
arbitrary_types_allowed=True,
populate_by_name=True,
alias_generator=to_camel,
)

@staticmethod
def from_dict(
job_status: JobStatus,
percentages_data: Optional[Dict],
) -> 'PercentagesDTO':
"""Create a PercentagesDTO from a dictionary of data."""
percentages = PercentagesDTO._create_percentages(
percentages_data=percentages_data
)

return percentages_data(
job_status=job_status,
percentages=percentages,
)

@staticmethod
def _create_percentages(
percentages_data: Optional[Dict],
) -> Optional[Percentages]:
"""Create a specific percentages instance from a dictionary of data."""
if not percentages_data:
return None
return Percentages(**percentages_data)
4 changes: 4 additions & 0 deletions api/app/models/model_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from app.db.dao.reference_dataset_dao import ReferenceDataset
from app.models.inferred_schema_dto import FieldType, SupportedTypes
from app.models.job_status import JobStatus
from app.models.metrics.percentages_dto import Percentages
from app.models.utils import is_none, is_number, is_number_or_string, is_optional_float


Expand Down Expand Up @@ -216,6 +217,7 @@ class ModelOut(BaseModel):
latest_current_uuid: Optional[UUID]
latest_reference_job_status: JobStatus
latest_current_job_status: JobStatus
percentages: Optional[Percentages]

model_config = ConfigDict(
populate_by_name=True, alias_generator=to_camel, protected_namespaces=()
Expand All @@ -226,6 +228,7 @@ def from_model(
model: Model,
latest_reference_dataset: Optional[ReferenceDataset] = None,
latest_current_dataset: Optional[CurrentDataset] = None,
percentages: Optional[Percentages] = None,
):
latest_reference_uuid = (
latest_reference_dataset.uuid if latest_reference_dataset else None
Expand Down Expand Up @@ -264,4 +267,5 @@ def from_model(
latest_current_uuid=latest_current_uuid,
latest_reference_job_status=latest_reference_job_status,
latest_current_job_status=latest_current_job_status,
percentages=percentages,
)
4 changes: 4 additions & 0 deletions api/app/routes/model_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def get_all_models_paginated(
def get_all_models():
return model_service.get_all_models()

@router.get('/last_n', status_code=200, response_model=List[ModelOut])
def get_last_n_models(n_models: int):
return model_service.get_last_n_models_percentages(n_models)

@router.post('', status_code=201, response_model=ModelOut)
def create_model(model_in: ModelIn):
model = model_service.create_model(model_in)
Expand Down
16 changes: 16 additions & 0 deletions api/app/services/model_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,22 @@ def get_all_models(
model_out_list.append(model_out)
return model_out_list

def get_last_n_models_percentages(self, n_models) -> List[ModelOut]:
models = self.model_dao.get_last_n_percentages(n_models)
model_out_list_tmp = []
for model, metrics in models:
latest_reference_dataset, latest_current_dataset = self.get_latest_datasets(
model.uuid
)
model_out = ModelOut.from_model(
model=model,
latest_reference_dataset=latest_reference_dataset,
latest_current_dataset=latest_current_dataset,
percentages=metrics.percentages,
)
model_out_list_tmp.append(model_out)
return model_out_list_tmp

def get_all_models_paginated(
self,
params: Params = Params(),
Expand Down
Loading

0 comments on commit 5e61425

Please sign in to comment.