From f994b45ed980e1404f5e31e90a9aebe70990f1b4 Mon Sep 17 00:00:00 2001 From: 100mi Date: Thu, 17 Nov 2022 15:11:19 +0530 Subject: [PATCH 1/2] fix: Remove unncesary profiling attributes from description --- app/core/config.py | 15 +++++++-- app/models/alerts.py | 5 ++- app/models/analysis.py | 3 ++ app/models/correlations.py | 3 ++ app/models/description.py | 17 +++++----- app/models/missing.py | 3 ++ app/models/package.py | 3 ++ app/models/sample.py | 3 ++ app/models/table.py | 17 +++++----- app/models/types.py | 3 ++ app/models/variables.py | 59 +++++++++++++++++++++++++++++++++-- app/utils/profile_segments.py | 12 ++++++- 12 files changed, 119 insertions(+), 24 deletions(-) diff --git a/app/core/config.py b/app/core/config.py index 1bb92b8..fae7965 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -1,3 +1,5 @@ +from typing import List + from pydantic import BaseSettings @@ -10,9 +12,16 @@ class Settings(BaseSettings): EXAMPLE_URL: str = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" # noqa: E501 # CORS PARAMS - CORS_ORIGINS: list = ["*"] - CORS_METHODS: list = ["*"] - CORS_HEADERS: list = ["*"] + CORS_ORIGINS: List[str] = ["*"] + CORS_METHODS: List[str] = ["*"] + CORS_HEADERS: List[str] = ["*"] + + # MODEL PARAMS + # Constraint for Column names + COLUMN_NAME_REGEX_PATTERN = r"[\w\s]*" + + # PROFILE SEGMENTS + SAMPLE_DATA_RENDERER: List[str] = ["head"] class Config: env_file = ".env" diff --git a/app/models/alerts.py b/app/models/alerts.py index 162d8b5..e336ee3 100644 --- a/app/models/alerts.py +++ b/app/models/alerts.py @@ -2,7 +2,6 @@ from typing import List -# from pandas_profiling.model.alerts import Alert from pydantic import BaseModel @@ -10,5 +9,5 @@ class Alerts(BaseModel): # __root__ : List[Alert] __root__: List[str] - # class Config: - # arbitrary_types_allowed = True + class Config: + underscore_attrs_are_private = True diff --git a/app/models/analysis.py b/app/models/analysis.py index 6a8248b..65d1f2f 100644 --- a/app/models/analysis.py +++ b/app/models/analysis.py @@ -8,3 +8,6 @@ class Analysis(BaseModel): date_start: datetime date_end: datetime duration: timedelta + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/correlations.py b/app/models/correlations.py index d66572e..d608c54 100644 --- a/app/models/correlations.py +++ b/app/models/correlations.py @@ -11,3 +11,6 @@ class Correlations(BaseModel): kendall: Optional[Union[Json, Dict]] cramers: Optional[Union[Json, Dict]] phi_k: Optional[Union[Json, Dict]] + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/description.py b/app/models/description.py index 4ff1a16..33800df 100644 --- a/app/models/description.py +++ b/app/models/description.py @@ -15,14 +15,17 @@ class Description(BaseModel): - analysis: Analysis + _analysis: Analysis table: Table variables: Variables - scatter: Scatter - correlations: Correlations - missing: Missing - alerts: Alerts - package: Package + _scatter: Scatter + _correlations: Correlations + _missing: Missing + _alerts: Alerts + _package: Package samples: List[Sample] - duplicates: Duplicates + _duplicates: Duplicates columns_order: List[str] + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/missing.py b/app/models/missing.py index c598ff7..2711ff9 100644 --- a/app/models/missing.py +++ b/app/models/missing.py @@ -34,3 +34,6 @@ class Missing(BaseModel): matrix: Optional[Matrix] = None heatmap: Optional[Heatmap] = None dendrogram: Optional[Dendrogram] = None + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/package.py b/app/models/package.py index 3e80b0c..7d2599d 100644 --- a/app/models/package.py +++ b/app/models/package.py @@ -6,3 +6,6 @@ class Package(BaseModel): pandas_profiling_version: str pandas_profiling_config: str + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/sample.py b/app/models/sample.py index dec1891..05c51d9 100644 --- a/app/models/sample.py +++ b/app/models/sample.py @@ -9,3 +9,6 @@ class Sample(BaseModel): data: Json name: str caption: Optional[str] = None + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/table.py b/app/models/table.py index b6e9bff..8345fc6 100644 --- a/app/models/table.py +++ b/app/models/table.py @@ -6,10 +6,13 @@ class Table(BaseModel): n: int n_var: int - memory_size: int - record_size: float - n_cells_missing: int - n_vars_with_missing: int - n_vars_all_missing: int - p_cells_missing: float - types: Types + _memory_size: int + _record_size: float + _n_cells_missing: int + _n_vars_with_missing: int + _n_vars_all_missing: int + _p_cells_missing: float + _types: Types + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/types.py b/app/models/types.py index 87e5efe..49fa25a 100644 --- a/app/models/types.py +++ b/app/models/types.py @@ -7,3 +7,6 @@ class Types(BaseModel): Numeric: Optional[int] Categorical: Optional[int] Unsupported: Optional[int] + + class Config: + underscore_attrs_are_private = True diff --git a/app/models/variables.py b/app/models/variables.py index f6e5c71..ec7357c 100644 --- a/app/models/variables.py +++ b/app/models/variables.py @@ -1,7 +1,60 @@ -from typing import Dict +from typing import Any, Dict, List, Optional, Union -from pydantic import BaseModel +from pydantic import BaseModel, Field, constr + +from app.core.config import Settings + +settings = Settings() +VARIABLE_COLUMN_CONSTRAINT = constr(regex=settings.COLUMN_NAME_REGEX_PATTERN) + + +class VariableProperties(BaseModel): + n_distinct: Optional[int] + _p_distinct: Optional[float] + _is_unique: Optional[bool] + n_unique: Optional[int] + _p_unique: Optional[float] + type: Optional[str] + _hashable: Optional[bool] + _ordering: Optional[bool] + _n_missing: Optional[int] + _n: Optional[int] + _p_missing: Optional[int] + count: Optional[int] + _memory_size: Optional[int] + _n_negative: Optional[int] + _p_negative: Optional[int] + _n_infinite: Optional[int] + _n_zeros: Optional[int] + _mean: Optional[float] + _std: Optional[float] + _variance: Optional[float] + _min: Optional[int] + _max: Optional[float] + _kurtosis: Optional[float] + _skewness: Optional[float] + _sum: Optional[float] + _mad: Optional[float] + _range: Optional[float] + _field_5_: Optional[Union[float, None]] = Field(None, alias="5%") + _field_25_: Optional[Union[float, None]] = Field(None, alias="25%") + _field_50_: Optional[Union[float, None]] = Field(None, alias="50%") + _field_75_: Optional[Union[int, None]] = Field(None, alias="75%") + _field_95_: Optional[Union[float, None]] = Field(None, alias="95%") + _iqr: Optional[float] + _cv: Optional[float] + _p_zeros: Optional[float] + _p_infinite: Optional[int] + _monotonic_increase: Optional[bool] + _monotonic_decrease: Optional[bool] + _monotonic_increase_strict: Optional[bool] + _monotonic_decrease_strict: Optional[bool] + _monotonic: Optional[int] + _histogram: Optional[List[Any]] + + class Config: + underscore_attrs_are_private = True class Variables(BaseModel): - __root__: Dict + __root__: Dict[VARIABLE_COLUMN_CONSTRAINT, VariableProperties] diff --git a/app/utils/profile_segments.py b/app/utils/profile_segments.py index 1b172af..ff52019 100644 --- a/app/utils/profile_segments.py +++ b/app/utils/profile_segments.py @@ -7,6 +7,7 @@ from pandas import DataFrame from pydantic import parse_obj_as +from app.core.config import Settings from app.models.alerts import Alerts from app.models.analysis import Analysis from app.models.correlations import Correlations @@ -19,6 +20,8 @@ from app.models.table import Table from app.models.variables import Variables +settings = Settings() + def json_conversion_objects(obj): """Fix improper objects while creating json @@ -100,7 +103,14 @@ def samples(self) -> List[Sample]: samples = self.pandas_profile.get_sample() for sample in samples: sample.data = sample.data.to_json() - return samples + print(type(samples)) + # * 'head' and 'tail' are returned as dataset sample + # * use env variable to select `hear` or `tail` or `both` + return [ + sample + for sample in samples + if sample.id in settings.SAMPLE_DATA_RENDERER + ] def duplicates(self) -> Duplicates: # get duplicates From 5a0038243bc7cdfd4bf1ca25f7299fd2f475bde1 Mon Sep 17 00:00:00 2001 From: 100mi Date: Thu, 17 Nov 2022 15:18:07 +0530 Subject: [PATCH 2/2] fix: Update flake8 repo in .pre-commit0config.yaml --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f1e6bcf..aebb248 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: black language_version: python3 -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/pycqa/flake8 rev: 4.0.1 hooks: - id: flake8