Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issues/fix import path for default version sets, output of unittest outputter and null checks, deprecates support for python 3.9. #107

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/coverage.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
name: Coverage

on:
push:
branches:
Expand All @@ -10,14 +9,15 @@ on:
branches:
- main
- dev

jobs:
coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
name: Lint

on:
push:
pull_request:
branches:
- main
- dev

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ on:
- main
- dev
- issue/**

jobs:
validate_focus:
runs-on: ubuntu-latest
Expand All @@ -14,8 +13,10 @@ jobs:
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@ on:
tags:
- 'v\d\.\d\.\d'
- 'v\d\.\d\.\d-(dev|rc)\d'

jobs:
publish:
permissions:
id-token: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: 3.11
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Install dependencies
run: |
find -type l -exec bash -c 'ln -f "$(readlink -m "$0")" "$0"' {} \;
poetry build
poetry build --format=sdist
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
5 changes: 1 addition & 4 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
name: Unittest

on:
push:
branches:
Expand All @@ -10,14 +9,12 @@ on:
branches:
- main
- dev

jobs:
test:
runs-on: ${{ matrix.os }}

strategy:
matrix:
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
os: [ windows-latest, ubuntu-latest, macos-latest ]
steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ tbd

### Prerequisites

- Python 3.8+
- Python 3.9+
- Poetry (Package & Dependency Manager)

### Installation
Expand Down
5 changes: 5 additions & 0 deletions focus_validator/config_objects/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,8 @@ def generate_check_friendly_name(check, column_id):
return f"{column_id} does not allow null values."
elif isinstance(check, DataTypeCheck):
return f"{column_id} requires values of type {check.data_type.value}."
elif isinstance(check, SQLQueryCheck):
sql_query = " ".join([word.strip() for word in check.sql_query.split()])
return f"{column_id} requires values that return true when evaluated by the following SQL query: {sql_query}"
else:
raise NotImplementedError(f"Check {check} not implemented.")
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __generate_pandera_check__(rule: Rule, check_id):
)
elif isinstance(check, AllowNullsCheck):
return pa.Check.check_not_null(
error=error_string, ignore_na=False, allow_nulls=check.allow_nulls
error=error_string, ignore_na=check.allow_nulls
)
else:
raise FocusNotImplementedError(
Expand Down
58 changes: 31 additions & 27 deletions focus_validator/config_objects/rule.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
from typing import Optional, Union
from typing import Annotated, Optional, Union

import yaml
from pydantic import BaseModel, ConfigDict, model_validator
from pydantic import BaseModel, ConfigDict, Field, field_validator
from pydantic_core.core_schema import ValidationInfo

from focus_validator.config_objects.common import (
SIMPLE_CHECKS,
Expand Down Expand Up @@ -33,44 +34,47 @@ class Rule(BaseModel):
SIMPLE_CHECKS, AllowNullsCheck, ValueInCheck, DataTypeCheck, SQLQueryCheck
]

check_friendly_name: Optional[
str
check_friendly_name: Annotated[
Optional[str], Field(validate_default=True)
] = None # auto generated or else can be overwritten
check_type_friendly_name: Optional[str] = None
check_type_friendly_name: Annotated[
Optional[str], Field(validate_default=True)
] = None

model_config = ConfigDict(
extra="forbid", # prevents config from containing any undesirable keys
frozen=True, # prevents any modification to any attribute onces loaded from config
)

# @root_validator
@model_validator(mode="before")
@classmethod
def root_val(cls, values):
"""
Root validator that checks for all options passed in the config and generate missing options.
"""
if values is None:
values = {}

check = values.get("check")
check_friendly_name = values.get("check_friendly_name")
column_id = values.get("column_id")
if check is not None:
@field_validator("check_friendly_name")
def validate_or_generate_check_friendly_name(
cls, check_friendly_name, validation_info: ValidationInfo
):
values = validation_info.data
if (
check_friendly_name is None
and values.get("check") is not None
and values.get("column_id") is not None
):
check_friendly_name = generate_check_friendly_name(
check=values["check"], column_id=values["column_id"]
)
return check_friendly_name

@field_validator("check_type_friendly_name")
def validate_or_generate_check_type_friendly_name(
cls, check_type_friendly_name, validation_info: ValidationInfo
):
values = validation_info.data
if values.get("check") is not None and values.get("column_id") is not None:
check = values.get("check")
if isinstance(check, str):
check_type_friendly_name = "".join(
[word.title() for word in check.split("_")]
)
else:
check_type_friendly_name = check.__class__.__name__
values["check_type_friendly_name"] = check_type_friendly_name

if check_friendly_name is None and column_id is not None:
values["check_friendly_name"] = generate_check_friendly_name(
check=check, column_id=column_id
)

return values
return check_type_friendly_name

@staticmethod
def load_yaml(
Expand Down
2 changes: 1 addition & 1 deletion focus_validator/data_loaders/csv_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ def __init__(self, data_filename):
self.data_filename = data_filename

def load(self):
return pd.read_csv(self.data_filename, keep_default_na=False)
return pd.read_csv(self.data_filename)
5 changes: 2 additions & 3 deletions focus_validator/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import argparse
import os
import sys

from focus_validator.validator import Validator
from focus_validator.validator import DEFAULT_VERSION_SETS_PATH, Validator


def main():
Expand Down Expand Up @@ -37,7 +36,7 @@ def main():
)
parser.add_argument(
"--rule-set-path",
default=os.path.join("focus_validator", "rules", "version_sets"),
default=DEFAULT_VERSION_SETS_PATH,
help="Path to rules definitions",
)
parser.add_argument(
Expand Down
7 changes: 3 additions & 4 deletions focus_validator/outputter/outputter_unittest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import re
import sys
import xml.etree.cElementTree as ET
from datetime import datetime, timezone
Expand Down Expand Up @@ -146,9 +145,9 @@ def write(self, result_set):

# Add the testcases to the testsuites
added_testsuites = {}
for testcase in [
r for r in rows if re.match(r"^FV-[D,M][0-9]{3}-[0-9]{4}$", r["check_name"])
]:
for testcase in rows:
if testcase["status"].value == "errored":
continue
test_suite_id = testcase["check_name"].rsplit("-", 1)[0]
if test_suite_id not in added_testsuites:
formatter.add_testsuite(
Expand Down
7 changes: 2 additions & 5 deletions focus_validator/rules/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@ def is_camel_case(column_name):


@extensions.register_check_method()
def check_not_null(pandas_obj: pd.Series, allow_nulls: bool):
# TODO: works for string type, need to verify for other data types
check_values = pandas_obj.isnull() | (pandas_obj == "")
if not allow_nulls:
check_values = check_values | (pandas_obj == "NULL")
def check_not_null(pandas_obj: pd.Series):
check_values = pandas_obj.isnull()
return ~check_values


Expand Down
11 changes: 3 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tool.poetry]
name = "focus-spec-validator"
name = "focus_validator"
version = "0.5.2-dev2"
description = "FOCUS spec validator."
authors = []
Expand All @@ -19,7 +19,7 @@ generate-setup-file = false
script = "build.py"

[tool.poetry.dependencies]
python = "^3.8.3"
python = "^3.9"
pandas = "^2"
tabulate = "*"
pyarrow = "*"
Expand All @@ -28,12 +28,7 @@ pyyaml = "*"
requests = "*"
pandera = { version = "^0.17.2" }
sqlglot = "^18.7.0"

# for Python 3.12, force higher version of numpy
numpy = [
{ version = "~1.24", python = "~3.8" },
{ version = "~1.26", python = "~3.12" }
]
numpy = { version = "^1.26"}
pytz = "^2023.3.post1"
pandasql = "^0.7.3"

Expand Down
42 changes: 10 additions & 32 deletions tests/checks/test_null_value_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_null_value_not_allowed_invalid_case(self):
allow_nulls=False, data_type=DataTypes.STRING
)
sample_data = pd.DataFrame(
[{"test_dimension": "NULL"}, {"test_dimension": "val2"}]
[{"test_dimension": None}, {"test_dimension": "val2"}]
)
schema, checklist = FocusToPanderaSchemaConverter.generate_pandera_schema(
rules=rules, override_config=None
Expand All @@ -104,12 +104,14 @@ def test_null_value_not_allowed_invalid_case(self):
"Column": "test_dimension",
"Check Name": "allow_null",
"Description": " test_dimension does not allow null values.",
"Values": "NULL",
"Values": None,
"Row #": 1,
},
)

def test_null_value_allowed_invalid_case_with_empty_strings(self):
def test_null_value_allowed_valid_case_with_empty_strings(self):
# ensure that check does not treat empty strings as null values

rules = self.__generate_sample_rule_type_string__(
allow_nulls=True, data_type=DataTypes.STRING
)
Expand All @@ -123,23 +125,11 @@ def test_null_value_allowed_invalid_case_with_empty_strings(self):
)
self.assertEqual(
validation_result.checklist["allow_null"].status,
ChecklistObjectStatus.FAILED,
)
self.assertIsNotNone(validation_result.failure_cases)
failure_cases_dict = validation_result.failure_cases.to_dict(orient="records")
self.assertEqual(len(failure_cases_dict), 1)
self.assertEqual(
failure_cases_dict[0],
{
"Column": "test_dimension",
"Check Name": "allow_null",
"Description": " test_dimension allows null values.",
"Values": "",
"Row #": 2,
},
ChecklistObjectStatus.PASSED,
)
self.assertIsNone(validation_result.failure_cases)

def test_null_value_allowed_invalid_case_with_nan_values(self):
def test_null_value_allowed_case_with_explicit_null_values(self):
rules = self.__generate_sample_rule_type_string__(
allow_nulls=True, data_type=DataTypes.STRING
)
Expand All @@ -155,18 +145,6 @@ def test_null_value_allowed_invalid_case_with_nan_values(self):
)
self.assertEqual(
validation_result.checklist["allow_null"].status,
ChecklistObjectStatus.FAILED,
)
self.assertIsNotNone(validation_result.failure_cases)
failure_cases_dict = validation_result.failure_cases.to_dict(orient="records")
self.assertEqual(len(failure_cases_dict), 1)
self.assertEqual(
failure_cases_dict[0],
{
"Column": "test_dimension",
"Check Name": "allow_null",
"Description": " test_dimension allows null values.",
"Values": None,
"Row #": 2,
},
ChecklistObjectStatus.PASSED,
)
self.assertIsNone(validation_result.failure_cases)
Loading