From 4e81cd978f7fc87dccecce60db3181796ab40e0e Mon Sep 17 00:00:00 2001 From: venu-sambarapu-DS Date: Wed, 5 Jun 2024 16:39:24 +0530 Subject: [PATCH] Modified the way of reading the standard dataset --- app/api/api_v1/routers/dictionary.py | 15 ++++++++++----- app/utils/airline.py | 1 + app/utils/common.py | 1 + app/utils/geography.py | 8 +++----- app/utils/insurance.py | 5 ++++- app/utils/metadata.py | 6 ++++-- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/app/api/api_v1/routers/dictionary.py b/app/api/api_v1/routers/dictionary.py index f962ee5..85e876d 100644 --- a/app/api/api_v1/routers/dictionary.py +++ b/app/api/api_v1/routers/dictionary.py @@ -1,8 +1,9 @@ +import io + import pandas as pd +import requests from fastapi import APIRouter, HTTPException, status from fastapi.encoders import jsonable_encoder -import io -import requests from fastapi.responses import JSONResponse from app.core.config import CORE_FOLDER, Settings @@ -15,9 +16,13 @@ g_sheet_session = requests.Session() -g_sheet_response = g_sheet_session.get("https://docs.google.com/spreadsheets/d/1NEsFJGr5IHsrIakGgeNFUvz5zpLOadh_vDH7Apqmv9E/gviz/tq?tqx=out:csv&sheet=master_dictionaries") +common_g_sheet_link_format = "https://docs.google.com/spreadsheets/d/" +g_sheet_id = "1NEsFJGr5IHsrIakGgeNFUvz5zpLOadh_vDH7Apqmv9E" +download_sheet_name = "/gviz/tq?tqx=out:csv&sheet=master_dictionaries" +url_name = common_g_sheet_link_format + g_sheet_id + download_sheet_name +g_sheet_response = g_sheet_session.get(url_name) g_sheet_bytes_data = g_sheet_response.content -data = pd.read_csv(io.StringIO(g_sheet_bytes_data.decode('utf-8'))) +data = pd.read_csv(io.StringIO(g_sheet_bytes_data.decode("utf-8"))) standard_data_values = data.copy() standard_data_values.rename( @@ -28,7 +33,7 @@ "psu_companies": "psu", "standard_district_name": "district", "standard_states": "state", - "insurance_standard_names": "insurance_companies" + "insurance_standard_names": "insurance_companies", }, inplace=True, ) diff --git a/app/utils/airline.py b/app/utils/airline.py index be39bd3..5627303 100644 --- a/app/utils/airline.py +++ b/app/utils/airline.py @@ -1,5 +1,6 @@ import great_expectations as ge from fastapi.encoders import jsonable_encoder + from app.api.api_v1.routers.dictionary import standard_data_values from app.core.config import AirlineSettings, Settings from app.utils.column_mapping import find_airline_name_columns diff --git a/app/utils/common.py b/app/utils/common.py index 28b4225..fd313c9 100644 --- a/app/utils/common.py +++ b/app/utils/common.py @@ -2,6 +2,7 @@ import re from io import BytesIO from typing import Union + # from app.api.api_v1.routers.dictionary import data as dictionary_data import great_expectations as ge import pandas as pd diff --git a/app/utils/geography.py b/app/utils/geography.py index e876320..3b97368 100644 --- a/app/utils/geography.py +++ b/app/utils/geography.py @@ -1,15 +1,13 @@ import asyncio from collections import ChainMap -from app.api.api_v1.routers.dictionary import standard_data_values + import great_expectations as ge from fastapi.encoders import jsonable_encoder +from app.api.api_v1.routers.dictionary import standard_data_values from app.core.config import GeographySettings, Settings from app.utils.column_mapping import find_geography_columns -from app.utils.common import ( - modify_values_to_be_in_set, - read_dataset, -) +from app.utils.common import modify_values_to_be_in_set, read_dataset settings = Settings() geograhy_setting = GeographySettings() diff --git a/app/utils/insurance.py b/app/utils/insurance.py index 93d64d3..c87fb03 100644 --- a/app/utils/insurance.py +++ b/app/utils/insurance.py @@ -1,5 +1,6 @@ import great_expectations as ge from fastapi.encoders import jsonable_encoder + from app.api.api_v1.routers.dictionary import standard_data_values from app.core.config import InsuranceCompanySettings, Settings from app.utils.column_mapping import find_insurance_company_columns @@ -16,7 +17,9 @@ async def modify_insurance_company_name_expectation_suite( insurance_company_settings.INSURANCE_COMPANY_NAME_EXPECTATION ) - insurance_company_names_dataset = standard_data_values[["insurance_companies"]] + insurance_company_names_dataset = standard_data_values[ + ["insurance_companies"] + ] insurance_company_names_list = insurance_company_names_dataset[ "insurance_companies" ].tolist() diff --git a/app/utils/metadata.py b/app/utils/metadata.py index 1c0745c..ed868cd 100644 --- a/app/utils/metadata.py +++ b/app/utils/metadata.py @@ -4,6 +4,7 @@ import great_expectations as ge from fastapi.encoders import jsonable_encoder +from app.api.api_v1.routers.dictionary import standard_data_values from app.core.config import MetadataSettings, Settings from app.utils.column_mapping import find_metadata_columns from app.utils.common import ( @@ -11,7 +12,6 @@ modify_values_to_match_regex_list, read_dataset, ) -from app.api.api_v1.routers.dictionary import standard_data_values from app.utils.general import general_metadata_expectation_suite from app.utils.tags import tags_expectation_suite from app.utils.unit import unit_expectation_suite @@ -209,7 +209,9 @@ async def modify_frequency_of_update_expectation_suite( meta_data_setting.FREQUENCY_OF_UPDATE_EXPECTATION ) - frequency_of_update_dataset = standard_data_values[["frequency_of_update"]].dropna() + frequency_of_update_dataset = standard_data_values[ + ["frequency_of_update"] + ].dropna() frequency_of_update_list = frequency_of_update_dataset[ "frequency_of_update" ].tolist()