Skip to content

Commit

Permalink
Fixed import path for default version sets, output of unittest output…
Browse files Browse the repository at this point in the history
…ter and null checks, deprecates support for python 3.9. (#107)

Signed-off-by: Varun Mittal <[email protected]>
  • Loading branch information
varunmittal91 authored Dec 19, 2023
1 parent 187d745 commit d211632
Show file tree
Hide file tree
Showing 20 changed files with 128 additions and 110 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/coverage.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
name: Coverage

on:
push:
branches:
Expand All @@ -10,14 +9,15 @@ on:
branches:
- main
- dev

jobs:
coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
name: Lint

on:
push:
pull_request:
branches:
- main
- dev

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ on:
- main
- dev
- issue/**

jobs:
validate_focus:
runs-on: ubuntu-latest
Expand All @@ -14,8 +13,10 @@ jobs:
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@ on:
tags:
- 'v\d\.\d\.\d'
- 'v\d\.\d\.\d-(dev|rc)\d'

jobs:
publish:
permissions:
id-token: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: 3.11
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Install dependencies
run: |
find -type l -exec bash -c 'ln -f "$(readlink -m "$0")" "$0"' {} \;
poetry build
poetry build --format=sdist
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
5 changes: 1 addition & 4 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
name: Unittest

on:
push:
branches:
Expand All @@ -10,14 +9,12 @@ on:
branches:
- main
- dev

jobs:
test:
runs-on: ${{ matrix.os }}

strategy:
matrix:
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
os: [ windows-latest, ubuntu-latest, macos-latest ]
steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ tbd

### Prerequisites

- Python 3.8+
- Python 3.9+
- Poetry (Package & Dependency Manager)

### Installation
Expand Down
5 changes: 5 additions & 0 deletions focus_validator/config_objects/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,8 @@ def generate_check_friendly_name(check, column_id):
return f"{column_id} does not allow null values."
elif isinstance(check, DataTypeCheck):
return f"{column_id} requires values of type {check.data_type.value}."
elif isinstance(check, SQLQueryCheck):
sql_query = " ".join([word.strip() for word in check.sql_query.split()])
return f"{column_id} requires values that return true when evaluated by the following SQL query: {sql_query}"
else:
raise NotImplementedError(f"Check {check} not implemented.")
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __generate_pandera_check__(rule: Rule, check_id):
)
elif isinstance(check, AllowNullsCheck):
return pa.Check.check_not_null(
error=error_string, ignore_na=False, allow_nulls=check.allow_nulls
error=error_string, ignore_na=check.allow_nulls
)
else:
raise FocusNotImplementedError(
Expand Down
58 changes: 31 additions & 27 deletions focus_validator/config_objects/rule.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
from typing import Optional, Union
from typing import Annotated, Optional, Union

import yaml
from pydantic import BaseModel, ConfigDict, model_validator
from pydantic import BaseModel, ConfigDict, Field, field_validator
from pydantic_core.core_schema import ValidationInfo

from focus_validator.config_objects.common import (
SIMPLE_CHECKS,
Expand Down Expand Up @@ -33,44 +34,47 @@ class Rule(BaseModel):
SIMPLE_CHECKS, AllowNullsCheck, ValueInCheck, DataTypeCheck, SQLQueryCheck
]

check_friendly_name: Optional[
str
check_friendly_name: Annotated[
Optional[str], Field(validate_default=True)
] = None # auto generated or else can be overwritten
check_type_friendly_name: Optional[str] = None
check_type_friendly_name: Annotated[
Optional[str], Field(validate_default=True)
] = None

model_config = ConfigDict(
extra="forbid", # prevents config from containing any undesirable keys
frozen=True, # prevents any modification to any attribute onces loaded from config
)

# @root_validator
@model_validator(mode="before")
@classmethod
def root_val(cls, values):
"""
Root validator that checks for all options passed in the config and generate missing options.
"""
if values is None:
values = {}

check = values.get("check")
check_friendly_name = values.get("check_friendly_name")
column_id = values.get("column_id")
if check is not None:
@field_validator("check_friendly_name")
def validate_or_generate_check_friendly_name(
cls, check_friendly_name, validation_info: ValidationInfo
):
values = validation_info.data
if (
check_friendly_name is None
and values.get("check") is not None
and values.get("column_id") is not None
):
check_friendly_name = generate_check_friendly_name(
check=values["check"], column_id=values["column_id"]
)
return check_friendly_name

@field_validator("check_type_friendly_name")
def validate_or_generate_check_type_friendly_name(
cls, check_type_friendly_name, validation_info: ValidationInfo
):
values = validation_info.data
if values.get("check") is not None and values.get("column_id") is not None:
check = values.get("check")
if isinstance(check, str):
check_type_friendly_name = "".join(
[word.title() for word in check.split("_")]
)
else:
check_type_friendly_name = check.__class__.__name__
values["check_type_friendly_name"] = check_type_friendly_name

if check_friendly_name is None and column_id is not None:
values["check_friendly_name"] = generate_check_friendly_name(
check=check, column_id=column_id
)

return values
return check_type_friendly_name

@staticmethod
def load_yaml(
Expand Down
2 changes: 1 addition & 1 deletion focus_validator/data_loaders/csv_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ def __init__(self, data_filename):
self.data_filename = data_filename

def load(self):
return pd.read_csv(self.data_filename, keep_default_na=False)
return pd.read_csv(self.data_filename)
5 changes: 2 additions & 3 deletions focus_validator/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import argparse
import os
import sys

from focus_validator.validator import Validator
from focus_validator.validator import DEFAULT_VERSION_SETS_PATH, Validator


def main():
Expand Down Expand Up @@ -37,7 +36,7 @@ def main():
)
parser.add_argument(
"--rule-set-path",
default=os.path.join("focus_validator", "rules", "version_sets"),
default=DEFAULT_VERSION_SETS_PATH,
help="Path to rules definitions",
)
parser.add_argument(
Expand Down
7 changes: 3 additions & 4 deletions focus_validator/outputter/outputter_unittest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import re
import sys
import xml.etree.cElementTree as ET
from datetime import datetime, timezone
Expand Down Expand Up @@ -146,9 +145,9 @@ def write(self, result_set):

# Add the testcases to the testsuites
added_testsuites = {}
for testcase in [
r for r in rows if re.match(r"^FV-[D,M][0-9]{3}-[0-9]{4}$", r["check_name"])
]:
for testcase in rows:
if testcase["status"].value == "errored":
continue
test_suite_id = testcase["check_name"].rsplit("-", 1)[0]
if test_suite_id not in added_testsuites:
formatter.add_testsuite(
Expand Down
7 changes: 2 additions & 5 deletions focus_validator/rules/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@ def is_camel_case(column_name):


@extensions.register_check_method()
def check_not_null(pandas_obj: pd.Series, allow_nulls: bool):
# TODO: works for string type, need to verify for other data types
check_values = pandas_obj.isnull() | (pandas_obj == "")
if not allow_nulls:
check_values = check_values | (pandas_obj == "NULL")
def check_not_null(pandas_obj: pd.Series):
check_values = pandas_obj.isnull()
return ~check_values


Expand Down
11 changes: 3 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tool.poetry]
name = "focus-spec-validator"
name = "focus_validator"
version = "0.5.2-dev2"
description = "FOCUS spec validator."
authors = []
Expand All @@ -19,7 +19,7 @@ generate-setup-file = false
script = "build.py"

[tool.poetry.dependencies]
python = "^3.8.3"
python = "^3.9"
pandas = "^2"
tabulate = "*"
pyarrow = "*"
Expand All @@ -28,12 +28,7 @@ pyyaml = "*"
requests = "*"
pandera = { version = "^0.17.2" }
sqlglot = "^18.7.0"

# for Python 3.12, force higher version of numpy
numpy = [
{ version = "~1.24", python = "~3.8" },
{ version = "~1.26", python = "~3.12" }
]
numpy = { version = "^1.26"}
pytz = "^2023.3.post1"
pandasql = "^0.7.3"

Expand Down
42 changes: 10 additions & 32 deletions tests/checks/test_null_value_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_null_value_not_allowed_invalid_case(self):
allow_nulls=False, data_type=DataTypes.STRING
)
sample_data = pd.DataFrame(
[{"test_dimension": "NULL"}, {"test_dimension": "val2"}]
[{"test_dimension": None}, {"test_dimension": "val2"}]
)
schema, checklist = FocusToPanderaSchemaConverter.generate_pandera_schema(
rules=rules, override_config=None
Expand All @@ -104,12 +104,14 @@ def test_null_value_not_allowed_invalid_case(self):
"Column": "test_dimension",
"Check Name": "allow_null",
"Description": " test_dimension does not allow null values.",
"Values": "NULL",
"Values": None,
"Row #": 1,
},
)

def test_null_value_allowed_invalid_case_with_empty_strings(self):
def test_null_value_allowed_valid_case_with_empty_strings(self):
# ensure that check does not treat empty strings as null values

rules = self.__generate_sample_rule_type_string__(
allow_nulls=True, data_type=DataTypes.STRING
)
Expand All @@ -123,23 +125,11 @@ def test_null_value_allowed_invalid_case_with_empty_strings(self):
)
self.assertEqual(
validation_result.checklist["allow_null"].status,
ChecklistObjectStatus.FAILED,
)
self.assertIsNotNone(validation_result.failure_cases)
failure_cases_dict = validation_result.failure_cases.to_dict(orient="records")
self.assertEqual(len(failure_cases_dict), 1)
self.assertEqual(
failure_cases_dict[0],
{
"Column": "test_dimension",
"Check Name": "allow_null",
"Description": " test_dimension allows null values.",
"Values": "",
"Row #": 2,
},
ChecklistObjectStatus.PASSED,
)
self.assertIsNone(validation_result.failure_cases)

def test_null_value_allowed_invalid_case_with_nan_values(self):
def test_null_value_allowed_case_with_explicit_null_values(self):
rules = self.__generate_sample_rule_type_string__(
allow_nulls=True, data_type=DataTypes.STRING
)
Expand All @@ -155,18 +145,6 @@ def test_null_value_allowed_invalid_case_with_nan_values(self):
)
self.assertEqual(
validation_result.checklist["allow_null"].status,
ChecklistObjectStatus.FAILED,
)
self.assertIsNotNone(validation_result.failure_cases)
failure_cases_dict = validation_result.failure_cases.to_dict(orient="records")
self.assertEqual(len(failure_cases_dict), 1)
self.assertEqual(
failure_cases_dict[0],
{
"Column": "test_dimension",
"Check Name": "allow_null",
"Description": " test_dimension allows null values.",
"Values": None,
"Row #": 2,
},
ChecklistObjectStatus.PASSED,
)
self.assertIsNone(validation_result.failure_cases)
Loading

0 comments on commit d211632

Please sign in to comment.