Skip to content

Commit

Permalink
Modified the way of reading the standard dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
venu-sambarapu-DS committed Jun 5, 2024
1 parent e384308 commit 4e81cd9
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 13 deletions.
15 changes: 10 additions & 5 deletions app/api/api_v1/routers/dictionary.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import io

import pandas as pd
import requests
from fastapi import APIRouter, HTTPException, status
from fastapi.encoders import jsonable_encoder
import io
import requests
from fastapi.responses import JSONResponse

from app.core.config import CORE_FOLDER, Settings
Expand All @@ -15,9 +16,13 @@


g_sheet_session = requests.Session()
g_sheet_response = g_sheet_session.get("https://docs.google.com/spreadsheets/d/1NEsFJGr5IHsrIakGgeNFUvz5zpLOadh_vDH7Apqmv9E/gviz/tq?tqx=out:csv&sheet=master_dictionaries")
common_g_sheet_link_format = "https://docs.google.com/spreadsheets/d/"
g_sheet_id = "1NEsFJGr5IHsrIakGgeNFUvz5zpLOadh_vDH7Apqmv9E"
download_sheet_name = "/gviz/tq?tqx=out:csv&sheet=master_dictionaries"
url_name = common_g_sheet_link_format + g_sheet_id + download_sheet_name
g_sheet_response = g_sheet_session.get(url_name)
g_sheet_bytes_data = g_sheet_response.content
data = pd.read_csv(io.StringIO(g_sheet_bytes_data.decode('utf-8')))
data = pd.read_csv(io.StringIO(g_sheet_bytes_data.decode("utf-8")))

standard_data_values = data.copy()
standard_data_values.rename(
Expand All @@ -28,7 +33,7 @@
"psu_companies": "psu",
"standard_district_name": "district",
"standard_states": "state",
"insurance_standard_names": "insurance_companies"
"insurance_standard_names": "insurance_companies",
},
inplace=True,
)
Expand Down
1 change: 1 addition & 0 deletions app/utils/airline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import great_expectations as ge
from fastapi.encoders import jsonable_encoder

from app.api.api_v1.routers.dictionary import standard_data_values
from app.core.config import AirlineSettings, Settings
from app.utils.column_mapping import find_airline_name_columns
Expand Down
1 change: 1 addition & 0 deletions app/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from io import BytesIO
from typing import Union

# from app.api.api_v1.routers.dictionary import data as dictionary_data
import great_expectations as ge
import pandas as pd
Expand Down
8 changes: 3 additions & 5 deletions app/utils/geography.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import asyncio
from collections import ChainMap
from app.api.api_v1.routers.dictionary import standard_data_values

import great_expectations as ge
from fastapi.encoders import jsonable_encoder

from app.api.api_v1.routers.dictionary import standard_data_values
from app.core.config import GeographySettings, Settings
from app.utils.column_mapping import find_geography_columns
from app.utils.common import (
modify_values_to_be_in_set,
read_dataset,
)
from app.utils.common import modify_values_to_be_in_set, read_dataset

settings = Settings()
geograhy_setting = GeographySettings()
Expand Down
5 changes: 4 additions & 1 deletion app/utils/insurance.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import great_expectations as ge
from fastapi.encoders import jsonable_encoder

from app.api.api_v1.routers.dictionary import standard_data_values
from app.core.config import InsuranceCompanySettings, Settings
from app.utils.column_mapping import find_insurance_company_columns
Expand All @@ -16,7 +17,9 @@ async def modify_insurance_company_name_expectation_suite(
insurance_company_settings.INSURANCE_COMPANY_NAME_EXPECTATION
)

insurance_company_names_dataset = standard_data_values[["insurance_companies"]]
insurance_company_names_dataset = standard_data_values[
["insurance_companies"]
]
insurance_company_names_list = insurance_company_names_dataset[
"insurance_companies"
].tolist()
Expand Down
6 changes: 4 additions & 2 deletions app/utils/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import great_expectations as ge
from fastapi.encoders import jsonable_encoder

from app.api.api_v1.routers.dictionary import standard_data_values
from app.core.config import MetadataSettings, Settings
from app.utils.column_mapping import find_metadata_columns
from app.utils.common import (
modify_values_to_be_in_set,
modify_values_to_match_regex_list,
read_dataset,
)
from app.api.api_v1.routers.dictionary import standard_data_values
from app.utils.general import general_metadata_expectation_suite
from app.utils.tags import tags_expectation_suite
from app.utils.unit import unit_expectation_suite
Expand Down Expand Up @@ -209,7 +209,9 @@ async def modify_frequency_of_update_expectation_suite(
meta_data_setting.FREQUENCY_OF_UPDATE_EXPECTATION
)

frequency_of_update_dataset = standard_data_values[["frequency_of_update"]].dropna()
frequency_of_update_dataset = standard_data_values[
["frequency_of_update"]
].dropna()
frequency_of_update_list = frequency_of_update_dataset[
"frequency_of_update"
].tolist()
Expand Down

0 comments on commit 4e81cd9

Please sign in to comment.