From cda61dea6203f3181c4fb2cd5c62805d74b45f6d Mon Sep 17 00:00:00 2001 From: 100mi Date: Wed, 21 Sep 2022 18:43:08 +0530 Subject: [PATCH 1/4] feat: Add column order to the desxription of meta-data --- .DS_Store | Bin 0 -> 8196 bytes app/api/api_v1/routers/profile.py | 6 ++++-- app/models/description.py | 1 + app/utils/profile_segments.py | 9 +++++++-- 4 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2d94126f18cfde77f84e98b609c8f7961053c61d GIT binary patch literal 8196 zcmeHM&ubGw7=2?>j0iR&Vnvh%Ej_hT1o0xQB?a$22_D*5ZN#JrZIXK2i;4%qlO8+? zdiA1b!5?_hLl6D|J^B}T5ybb+jNO@R61`{zzk!*zo$vehy?vYPWVQgzLT_~*m;z8? z7pxxSFr-ObdZF#}GYQeK9?iA+rEBe0r;w==>VP_+4yXg_fI9H6IKVYqT4lt#@1i=Y z1M0wk>3}>R5_ZAVW9HC4Iyl%Z0I|$z+jv}+17hMa^_V$igw428qAN9ii(yH_?su@1;)n_o zGCC{L;~q8>)YHdxtfL=lIsSNShFXGXsN?C2XKyYawO&LSxrX5xA^(Z1+u}zFE9jsT za>HEsnnyz3JF__E?}4uiue%rx`B2&sz6!7PHaS~lZSr_CV#X1<|Am}T!};}xFAu+c zC!d2MdAS4Q@*-XCK9td=s!iOA^_&zZc4MKA?aA)+#z(6oi0Ayn;xX?sPtDI`A8k6( zqa%ZEoINcI&Ok<};rsJ57r%Y58VXstlF?a_o^$pU+97Pw2YxRxiyKt7L6pDd=uIQ` zM&x5`eyDZl%I5>qudUW!)a1&4wVTB~}O2fj{qnDc6>23*>Q2bCFPf`Xubj?9wDJb7)IANQ%tc?KpAwABH?md@S{t TIb?)Qe+XC`bWjKWssld&`9=%` literal 0 HcmV?d00001 diff --git a/app/api/api_v1/routers/profile.py b/app/api/api_v1/routers/profile.py index b7fcdcc..d30d354 100644 --- a/app/api/api_v1/routers/profile.py +++ b/app/api/api_v1/routers/profile.py @@ -88,7 +88,7 @@ async def profile_samples( ) # use `ProfileSegments` to get table part of pandas profiling - profile_segment = ProfileSegments(profile) + profile_segment = ProfileSegments(profile, columns=list(dataframe.columns)) samples = profile_segment.samples() return samples @@ -351,7 +351,9 @@ async def profile_description( ) # use `ProfileSegments` to get duplicates part of pandas profiling - profile_segment = ProfileSegments(profile) + profile_segment = ProfileSegments( + profile, columns_order=list(dataframe.columns) + ) description = profile_segment.description() return description diff --git a/app/models/description.py b/app/models/description.py index 025c66d..4ff1a16 100644 --- a/app/models/description.py +++ b/app/models/description.py @@ -25,3 +25,4 @@ class Description(BaseModel): package: Package samples: List[Sample] duplicates: Duplicates + columns_order: List[str] diff --git a/app/utils/profile_segments.py b/app/utils/profile_segments.py index 3c4ead4..1b172af 100644 --- a/app/utils/profile_segments.py +++ b/app/utils/profile_segments.py @@ -1,6 +1,6 @@ import datetime import json -from typing import List +from typing import List, Union import numpy as np from numpy import bool_ @@ -40,12 +40,13 @@ def json_conversion_objects(obj): class ProfileSegments: - def __init__(self, pandas_profile): + def __init__(self, pandas_profile, columns_order=None): """ Pass pandas profile of a dataset as argument """ self.pandas_profile = pandas_profile self.profile_description = pandas_profile.get_description() + self.col_order = columns_order def analysis(self) -> Analysis: return parse_obj_as( @@ -110,6 +111,9 @@ def duplicates(self) -> Duplicates: mod_duplicates = "None" return mod_duplicates + def columns_order(self) -> Union[List[str], None]: + return self.col_order + def description(self) -> Description: return { "analysis": self.analysis(), @@ -122,4 +126,5 @@ def description(self) -> Description: "package": self.package(), "samples": self.samples(), "duplicates": self.duplicates(), + "columns_order": self.columns_order(), } From c00fa4ae6f52ad10a54423524487b4bfd4679fa9 Mon Sep 17 00:00:00 2001 From: 100mi Date: Wed, 21 Sep 2022 18:48:50 +0530 Subject: [PATCH 2/4] fix: Typo error --- app/api/api_v1/routers/profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/api_v1/routers/profile.py b/app/api/api_v1/routers/profile.py index d30d354..b5b04e0 100644 --- a/app/api/api_v1/routers/profile.py +++ b/app/api/api_v1/routers/profile.py @@ -88,7 +88,7 @@ async def profile_samples( ) # use `ProfileSegments` to get table part of pandas profiling - profile_segment = ProfileSegments(profile, columns=list(dataframe.columns)) + profile_segment = ProfileSegments(profile) samples = profile_segment.samples() return samples From d463caa6f33c4b638ee3c28b571a9766774145d4 Mon Sep 17 00:00:00 2001 From: 100mi Date: Thu, 22 Sep 2022 11:18:44 +0530 Subject: [PATCH 3/4] fix: Add files to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b0ab456..3babbd8 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ task __pycache__/ *.py[cod] *$py.class +*.DS_Store +.DS_Store # C extensions *.so From f07dd58960d15d238ce71aa76b9e651f4376c70b Mon Sep 17 00:00:00 2001 From: shreeharsha-factly <55734346+shreeharsha-factly@users.noreply.github.com> Date: Thu, 22 Sep 2022 11:59:12 +0530 Subject: [PATCH 4/4] Delete .DS_Store --- .DS_Store | Bin 8196 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 2d94126f18cfde77f84e98b609c8f7961053c61d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHM&ubGw7=2?>j0iR&Vnvh%Ej_hT1o0xQB?a$22_D*5ZN#JrZIXK2i;4%qlO8+? zdiA1b!5?_hLl6D|J^B}T5ybb+jNO@R61`{zzk!*zo$vehy?vYPWVQgzLT_~*m;z8? z7pxxSFr-ObdZF#}GYQeK9?iA+rEBe0r;w==>VP_+4yXg_fI9H6IKVYqT4lt#@1i=Y z1M0wk>3}>R5_ZAVW9HC4Iyl%Z0I|$z+jv}+17hMa^_V$igw428qAN9ii(yH_?su@1;)n_o zGCC{L;~q8>)YHdxtfL=lIsSNShFXGXsN?C2XKyYawO&LSxrX5xA^(Z1+u}zFE9jsT za>HEsnnyz3JF__E?}4uiue%rx`B2&sz6!7PHaS~lZSr_CV#X1<|Am}T!};}xFAu+c zC!d2MdAS4Q@*-XCK9td=s!iOA^_&zZc4MKA?aA)+#z(6oi0Ayn;xX?sPtDI`A8k6( zqa%ZEoINcI&Ok<};rsJ57r%Y58VXstlF?a_o^$pU+97Pw2YxRxiyKt7L6pDd=uIQ` zM&x5`eyDZl%I5>qudUW!)a1&4wVTB~}O2fj{qnDc6>23*>Q2bCFPf`Xubj?9wDJb7)IANQ%tc?KpAwABH?md@S{t TIb?)Qe+XC`bWjKWssld&`9=%`