Skip to content

Commit

Permalink
Fix: Additional Granular values
Browse files Browse the repository at this point in the history
  • Loading branch information
HemanthM005 committed Feb 19, 2024
1 parent c590509 commit cb8b97b
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 5 deletions.
Binary file modified .DS_Store
Binary file not shown.
17 changes: 17 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,23 @@ class GeographySettings(BaseSettings):
}


class OtherSettings(BaseSettings):

AIRLINE_KEYWORD = ".*airline[s]*[_names]{0,}"
AIRPORT_KEYWORD = ".*airport[s]*[_names]{0,}"
LANGUAGE_KEYWORD = ".*language.*"
CROPS_KEYWORD = ".*crop[s]*[_names]{0,}"
GENDER_KEYWORD = ".*gender.*"

GRANULARITY_REPRESENTATION = {
"airline": "Airline",
"airport": "Airport",
"language": "Language",
"crop": "Crop",
"gender": "Gender"
}


class UnitSettings(BaseSettings):

UNIT_KEYWORD = "unit"
Expand Down
47 changes: 44 additions & 3 deletions app/utils/columns_mapping.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import re
from itertools import chain

from fastapi.logger import logger

from app.core.config import (
DateTimeSettings,
GeographySettings,
NoteSettings,
UnitSettings,
OtherSettings,
)

datetime_settings = DateTimeSettings()
geography_settings = GeographySettings()
unit_settings = UnitSettings()
note_settings = NoteSettings()
other_settings = OtherSettings()


def extract_pattern_from_columns(
Expand All @@ -24,6 +24,46 @@ def extract_pattern_from_columns(
return matched_columns, columns.difference(matched_columns)


async def find_other_granular_columns(columns: set):
airline_pattern = re.compile(
r".*({})".format(other_settings.AIRLINE_KEYWORD)
)
airport_pattern = re.compile(
r".*({})".format(other_settings.AIRPORT_KEYWORD)
)
language_pattern = re.compile(
r".*({})".format(other_settings.LANGUAGE_KEYWORD)
)
crop_pattern = re.compile(
r".*({})".format(other_settings.CROPS_KEYWORD)
)
gender_pattern = re.compile(
r".*({})".format(other_settings.AIRPORT_KEYWORD)
)

airline_columns, columns = extract_pattern_from_columns(
columns, airline_pattern
)
airport_columns, columns = extract_pattern_from_columns(
columns, airport_pattern
)
language_columns, columns = extract_pattern_from_columns(
columns, language_pattern
)
crop_columns, columns = extract_pattern_from_columns(columns, crop_pattern)
gender_columns, columns = extract_pattern_from_columns(
columns, gender_pattern
)

return {
"airline": airline_columns,
"airport": airport_columns,
"language": language_columns,
"crop": crop_columns,
"gender": gender_columns,
}


async def find_datetime_columns(columns: set):
non_cal_year_pattern = re.compile(
r".*({}|{})".format(
Expand Down Expand Up @@ -64,7 +104,7 @@ async def find_datetime_columns(columns: set):
columns, month_pattern
)
date_columns, columns = extract_pattern_from_columns(columns, date_pattern)
logger.info(f"date_columns: {date_columns}")

# filter out `as_on_date` from date columns
date_columns = {
col for col in date_columns if not as_on_date_pattern.match(col)
Expand Down Expand Up @@ -145,4 +185,5 @@ async def find_mapped_columns(columns):
list(chain.from_iterable(mapped_columns.values()))
)
)

return {**mapped_columns, "unmapped": not_mapped_columns}
16 changes: 15 additions & 1 deletion app/utils/granularity.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
from app.core.config import DateTimeSettings, GeographySettings
from app.core.config import DateTimeSettings, GeographySettings, OtherSettings
from app.utils.columns_mapping import (
find_datetime_columns,
find_geography_columns,
find_other_granular_columns,
)
from app.utils.common import get_key_from_dict

datetime_settings = DateTimeSettings()
geographic_settings = GeographySettings()
other_settings = OtherSettings()


async def get_granularity(columns):
datetime_columns = await find_datetime_columns(columns)
geographic_columns = await find_geography_columns(columns)
other_granular_columns = await find_other_granular_columns(columns)

datetime_columns = {
key: value for key, value in datetime_columns.items() if value
}
geographic_columns = {
key: value for key, value in geographic_columns.items() if value
}
other_granular_columns = {
key: value for key, value in other_granular_columns.items() if value
}

sorted_datetime_columns = sorted(
datetime_columns.items(),
Expand Down Expand Up @@ -48,4 +54,12 @@ async def get_granularity(columns):
]
)

if len(other_granular_columns) > 0:
granularity_values.extend(
[
other_settings.GRANULARITY_REPRESENTATION[key]
for key in other_granular_columns.keys()
]
)

return {"granularity": ", ".join(granularity_values)}
2 changes: 1 addition & 1 deletion app/utils/temporal_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,5 @@ async def get_temporal_coverage(dataset, mapped_columns: dict):
# temporal_coverage = temporal_coverage_representation(
# year_in_sequence, year_mapping
# )
logger.warning(f"Temporal Coverage: {temporal_coverage}")

return {"temporal_coverage": temporal_coverage}

0 comments on commit cb8b97b

Please sign in to comment.