From 6fcc8903c9fd71145ed924e998360bb2df7023a8 Mon Sep 17 00:00:00 2001 From: venu-sambarapu-DS Date: Thu, 10 Oct 2024 17:21:17 +0530 Subject: [PATCH] Added error message in well explained manner --- app/core/config.py | 6 +++--- app/utils/general.py | 12 ++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/app/core/config.py b/app/core/config.py index 43d876e..07d8dee 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -323,7 +323,7 @@ class CustomExpectationsSettings(BaseSettings): "Index not in Column Names" ) INDEX_NOT_IN_COLUMN_NAMES_EXPECTATION_ERR_MSG: str = ( - "Column names should not have index as a column" + "Column names should not have 'index' as a column so please rename - {column}" ) NULL_DATETIME_VALUE_NAME: str = "Null date values Flag - {column}" NULL_DATETIME_VALUE_MSG: str = ( @@ -336,7 +336,7 @@ class CustomExpectationsSettings(BaseSettings): "Numeric values in specific pattern - {column}" ) NUMERIC_EXPECTATION_ERR_MSG: str = ( - "Numeric values should be in proper format both integer and float(roundoff to two decimal places)" + "Numeric values should be in proper format both integer and float(round-off to two decimal places)" ) NEGATIVE_NUMERIC_VALUES_PATTERN = re.compile(r"^-\d+(\.\d{1,})?$") @@ -350,7 +350,7 @@ class CustomExpectationsSettings(BaseSettings): COLUMN_NAMES_PATTERN = re.compile(r"^[a-z]+(?:_[a-z]+)*$") COLUMN_NAMES_EXPECTATION_NAME: str = "Column names in specific pattern" COLUMN_NAMES_EXPECTATION_ERR_MSG: str = ( - "Column names should be in lower case and separated by underscore - {column}" + "Column names should be in lower case and separated by underscore - Example 'Sub Category' column should be written as 'sub_category' The improper columns list is: {column}" ) TRAIL_OR_LEAD_WHITESPACE_PATTERN = re.compile(r"^\s+.*|.*\s+$") diff --git a/app/utils/general.py b/app/utils/general.py index e209d3b..da5a956 100644 --- a/app/utils/general.py +++ b/app/utils/general.py @@ -337,7 +337,15 @@ async def column_names_expectation_suite(dataset, result_format): "cleaning_pdf_link": settings.DATA_CLEANING_GUIDE_LINK, "expectation_name": custom_settings.COLUMN_NAMES_EXPECTATION_NAME, "expectation_error_message": custom_settings.COLUMN_NAMES_EXPECTATION_ERR_MSG.format( - column=dataset.columns.tolist() + column=list( + set(dataset.columns.tolist()) + - set( + [ + i.lower().replace(" ", "_") + for i in dataset.columns.tolist() + ] + ) + ) ), } response = { @@ -367,7 +375,7 @@ async def index_not_in_columns_expectation_suite(dataset, result_format): "cleaning_pdf_link": settings.DATA_CLEANING_GUIDE_LINK, "expectation_name": custom_settings.INDEX_NOT_IN_COLUMN_NAMES_EXPECTATION_NAME, "expectation_error_message": custom_settings.INDEX_NOT_IN_COLUMN_NAMES_EXPECTATION_ERR_MSG.format( - column=dataset.columns.tolist() + column=[i for i in dataset.columns.tolist() if i == "index"] ), } response = {