Skip to content

Commit

Permalink
Merge pull request #1458 from Sage-Bionetworks/develop-fix-ge
Browse files Browse the repository at this point in the history
fix: check existing great expectation suite before adding a new one
  • Loading branch information
linglp authored Aug 1, 2024
2 parents 62f3692 + 89c114e commit 6db0860
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 20 deletions.
62 changes: 42 additions & 20 deletions schematic/models/GE_Helpers.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
from statistics import mode
from tabnanny import check
import logging
import os
import re
import numpy as np
from statistics import mode
from tabnanny import check

# allows specifying explicit variable types
from typing import Any, Dict, Optional, Text, List
from urllib.parse import urlparse
from urllib.request import urlopen, OpenerDirector, HTTPDefaultErrorHandler
from urllib.request import Request
from typing import Any, Dict, List, Optional, Text
from urllib import error
from attr import attr

from ruamel import yaml
from urllib.parse import urlparse
from urllib.request import HTTPDefaultErrorHandler, OpenerDirector, Request, urlopen

import great_expectations as ge
import numpy as np
from attr import attr
from great_expectations.core import ExpectationSuite
from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.data_context import BaseDataContext
from great_expectations.data_context.types.base import (
Expand All @@ -27,18 +24,17 @@
ExpectationSuiteIdentifier,
)
from great_expectations.exceptions.exceptions import GreatExpectationsError
from ruamel import yaml


import great_expectations as ge
from schematic.models.validate_attribute import GenerateError
from schematic.schemas.data_model_graph import DataModelGraphExplorer

from schematic.utils.schema_utils import extract_component_validation_rules

from schematic.utils.validate_utils import (
rule_in_rule_list,
np_array_to_str_list,
iterable_to_str_list,
np_array_to_str_list,
required_is_only_rule,
rule_in_rule_list,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -147,6 +143,35 @@ def build_context(self):
# self.context.test_yaml_config(yaml.dump(datasource_config))
self.context.add_datasource(**datasource_config)

def add_expectation_suite_if_not_exists(self) -> ExpectationSuite:
"""
Purpose:
Add expectation suite if it does not exist
Input:
Returns:
saves expectation suite and identifier to self
"""
self.expectation_suite_name = "Manifest_test_suite"
# Get a list of all expectation suites
suite_names = self.context.list_expectation_suite_names()
# Get a list of all checkpoints
all_checkpoints = self.context.list_checkpoints()

# if the suite exists, delete it
if self.expectation_suite_name in suite_names:
self.context.delete_expectation_suite(self.expectation_suite_name)

# also delete all the checkpoints associated with the suite
if all_checkpoints:
for checkpoint_name in all_checkpoints:
self.context.delete_checkpoint(checkpoint_name)

self.suite = self.context.add_expectation_suite(
expectation_suite_name=self.expectation_suite_name,
)

return self.suite

def build_expectation_suite(
self,
):
Expand All @@ -162,10 +187,7 @@ def build_expectation_suite(
"""

# create blank expectation suite
self.expectation_suite_name = "Manifest_test_suite"
self.suite = self.context.add_expectation_suite(
expectation_suite_name=self.expectation_suite_name,
)
self.suite = self.add_expectation_suite_if_not_exists()

# build expectation configurations for each expectation
for col in self.manifest.columns:
Expand Down
72 changes: 72 additions & 0 deletions tests/test_ge_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from typing import Generator
from unittest.mock import MagicMock

import pandas as pd
import pytest

from schematic.models.GE_Helpers import GreatExpectationsHelpers
from tests.conftest import Helpers


@pytest.fixture(scope="class")
def mock_ge_helpers(
helpers: Helpers,
) -> Generator[GreatExpectationsHelpers, None, None]:
"""Fixture for creating a GreatExpectationsHelpers object"""
dmge = helpers.get_data_model_graph_explorer(path="example.model.jsonld")
unimplemented_expectations = ["url"]
test_manifest_path = helpers.get_data_path("mock_manifests/Valid_Test_Manifest.csv")
manifest = helpers.get_data_frame(test_manifest_path)

ge_helpers = GreatExpectationsHelpers(
dmge=dmge,
unimplemented_expectations=unimplemented_expectations,
manifest=manifest,
manifestPath=test_manifest_path,
)
yield ge_helpers


class TestGreatExpectationsHelpers:
def test_add_expectation_suite_if_not_exists_does_not_exist(
self, mock_ge_helpers: Generator[GreatExpectationsHelpers, None, None]
) -> None:
"""test add_expectation_suite_if_not_exists method when the expectation suite does not exists"""
# mock context provided by ge_helpers
mock_ge_helpers.context = MagicMock()
mock_ge_helpers.context.list_expectation_suite_names.return_value = []

# Call the method
result = mock_ge_helpers.add_expectation_suite_if_not_exists()

# Make sure the method of creating expectation suites if it doesn't exist
mock_ge_helpers.context.list_expectation_suite_names.assert_called_once()
mock_ge_helpers.context.add_expectation_suite.assert_called_once_with(
expectation_suite_name="Manifest_test_suite"
)

def test_add_expectation_suite_if_not_exists_does_exist(
self, mock_ge_helpers: Generator[GreatExpectationsHelpers, None, None]
) -> None:
"""test add_expectation_suite_if_not_exists method when the expectation suite does exists"""
# mock context provided by ge_helpers
mock_ge_helpers.context = MagicMock()
mock_ge_helpers.context.list_expectation_suite_names.return_value = [
"Manifest_test_suite"
]
mock_ge_helpers.context.list_checkpoints.return_value = ["test_checkpoint"]

# Call the method
result = mock_ge_helpers.add_expectation_suite_if_not_exists()

# Make sure the method of deleting suites gets called
mock_ge_helpers.context.list_expectation_suite_names.assert_called_once()
mock_ge_helpers.context.delete_expectation_suite.assert_called_once_with(
"Manifest_test_suite"
)
mock_ge_helpers.context.add_expectation_suite.assert_called_once_with(
expectation_suite_name="Manifest_test_suite"
)
mock_ge_helpers.context.delete_checkpoint.assert_called_once_with(
"test_checkpoint"
)

0 comments on commit 6db0860

Please sign in to comment.