diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..de138341fc --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{"[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + }, + "isort.args":["--profile", "black"], +} \ No newline at end of file diff --git a/giskard/scanner/robustness/__init__.py b/giskard/scanner/robustness/__init__.py index 10f1c24803..e9b52f775f 100644 --- a/giskard/scanner/robustness/__init__.py +++ b/giskard/scanner/robustness/__init__.py @@ -10,6 +10,13 @@ """ from .base_detector import BaseTextPerturbationDetector from .ethical_bias_detector import EthicalBiasDetector +from .switch_detector import SwitchAllDetector from .text_perturbation_detector import TextPerturbationDetector -__all__ = ["EthicalBiasDetector", "TextPerturbationDetector", "BaseTextPerturbationDetector"] +__all__ = [ + "EthicalBiasDetector", + "TextPerturbationDetector", + "BaseTextPerturbationDetector", + "BaseCategorialTransformation", + "SwitchAllDetector", +] diff --git a/giskard/scanner/robustness/base_detector.py b/giskard/scanner/robustness/base_detector.py index 5362a7e78c..0250382aeb 100644 --- a/giskard/scanner/robustness/base_detector.py +++ b/giskard/scanner/robustness/base_detector.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence +from typing import Optional, Sequence, Union from abc import abstractmethod @@ -11,6 +11,7 @@ from ..issues import Issue, IssueLevel, Robustness from ..logger import logger from ..registry import Detector +from .feature_transformation import CategorialTransformation from .text_transformations import TextTransformation @@ -217,6 +218,200 @@ def _detect_issues( return issues +class BaseCategorialPertubationDetector(Detector): + """Base class for metamorphic detectors based on categorial feature""" + + _issue_group = Robustness + # @TODO : Reserch for the adapted value for the taxonomy. + _taxonomy = None + + def __init__( + self, + transformations: Optional[Sequence[CategorialTransformation]] = None, + threshold: Optional[float] = None, + output_sensitivity: Optional[float] = None, + num_samples: Optional[int] = None, + ): + """ + Create a new instance of the detector + Parameters + ---------- + transformations: Optional[Sequence[CategorialTransformation]] + The categorial transformation used in the metamorphic test. If not provided, a default set of transformation will be used. + threshold: Optional[float] + The threshold for the fail rate, which is defined as the proportion of samples for which the model + prediction has changed. If the fail rate is greater than the threshold, an issue is created. + If not provided, a default threshold will be used. + output_sensitivity: Optional[float] + For regression models, the output sensitivity is the maximum relative change in the prediction that is + considered acceptable. If the relative change is greater than the output sensitivity, an issue is created. + This parameter is ignored for classification models. If not provided, a default output sensitivity will be + used. + num_samples: Optional[int] + The maximum number of samples to use for the metamorphic testing. If not provided, a default number of + samples will be used. + """ + self.transformations = transformations + self.threshold = threshold + self.num_samples = num_samples + self.output_sensitivity = output_sensitivity + + def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Sequence[Issue]: + transformations = self.transformations or self._get_default_transformations(model, dataset) + # Only analyze categorials features + cat_features = [f for f in features if dataset.column_types[f] == "category"] + logger.info( + f"{self.__class__.__name__}: Running with transformations={[t.name for t in transformations]} " + f"threshold={self.threshold} output_sensitivity={self.output_sensitivity} num_samples={self.num_samples}" + ) + + issues = [] + for transformation in transformations: + issues.extend(self._detect_issues(model, dataset, transformation, cat_features)) + + return [i for i in issues if i] + + @abstractmethod + def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[CategorialTransformation]: + ... + + def _detect_issues( + self, + model: BaseModel, + dataset: Dataset, + transformation: CategorialTransformation, + features: Sequence[Union[str, int]], + ) -> Sequence[Issue]: + num_samples = self.num_samples if self.num_samples is not None else _get_default_num_samples(model) + output_sensitivity = ( + self.output_sensitivity if self.output_sensitivity is not None else _get_default_output_sensitivity(model) + ) + threshold = self.threshold if self.threshold is not None else _get_default_threshold(model) + + issues = [] + # @TODO: integrate this with Giskard metamorphic tests already present + for feature in features: + transformation_fn = transformation(column=feature) + transformed = dataset.transform(transformation_fn) + + changed_idx = dataset.df.index[transformed.df[feature] != dataset.df[feature]] + + if changed_idx.empty: + continue + + # Select a random subset of the changed records + if len(changed_idx) > num_samples: + rng = np.random.default_rng(747) + changed_idx = changed_idx[rng.choice(len(changed_idx), num_samples, replace=False)] + + original_data = Dataset( + dataset.df.loc[changed_idx], + target=dataset.target, + column_types=dataset.column_types, + validation=False, + ) + perturbed_data = Dataset( + transformed.df.loc[changed_idx], + target=dataset.target, + column_types=dataset.column_types, + validation=False, + ) + + # Calculate predictions + original_pred = model.predict(original_data) + perturbed_pred = model.predict(perturbed_data) + + if model.is_classification: + passed = original_pred.raw_prediction == perturbed_pred.raw_prediction + elif model.is_regression: + rel_delta = _relative_delta(perturbed_pred.raw_prediction, original_pred.raw_prediction) + passed = np.abs(rel_delta) < output_sensitivity + elif model.is_text_generation: + try: + import evaluate + except ImportError as err: + raise LLMImportError() from err + + scorer = evaluate.load("bertscore") + score = scorer.compute( + predictions=perturbed_pred.prediction, + references=original_pred.prediction, + model_type="distilbert-base-multilingual-cased", + idf=True, + ) + passed = np.array(score["f1"]) > 1 - output_sensitivity + else: + raise NotImplementedError("Only classification, regression, or text generation models are supported.") + pass_rate = passed.mean() + fail_rate = 1 - pass_rate + logger.info( + f"{self.__class__.__name__}: Testing `{feature}` for perturbation `{transformation.name}`\tFail rate: {fail_rate:.3f}" + ) + + if fail_rate >= threshold: + # Severity + issue_level = IssueLevel.MAJOR if fail_rate >= 2 * threshold else IssueLevel.MEDIUM + + # Description + desc = ( + "When feature “{feature}” is perturbed with the transformation “{transformation_fn}”, " + "the model changes its prediction in {fail_rate_percent}% of the cases. " + "We expected the predictions not to be affected by this transformation." + ) + + failed_size = (~passed).sum() + slice_size = len(passed) + + issue = Issue( + model, + dataset, + group=self._issue_group, + level=issue_level, + transformation_fn=transformation_fn, + description=desc, + features=[feature], + meta={ + "feature": feature, + "domain": f"Feature `{feature}`", + "deviation": f"{failed_size}/{slice_size} tested samples ({round(fail_rate * 100, 2)}%) changed prediction after perturbation", + "failed_size": failed_size, + "slice_size": slice_size, + "fail_rate": fail_rate, + "fail_rate_percent": round(fail_rate * 100, 2), + "metric": "Fail rate", + "metric_value": fail_rate, + "threshold": threshold, + "output_sentitivity": output_sensitivity, + "perturbed_data_slice": perturbed_data, + "perturbed_data_slice_predictions": perturbed_pred, + }, + importance=fail_rate, + tests=_generate_robustness_tests, + taxonomy=self._taxonomy, + ) + + # Add examples + examples = original_data.df.loc[~passed, (feature,)].copy() + examples[f"{transformation_fn.name}({feature})"] = perturbed_data.df.loc[~passed, feature] + + examples["Original prediction"] = original_pred.prediction[~passed] + examples["Prediction after perturbation"] = perturbed_pred.prediction[~passed] + + if model.is_classification: + examples["Original prediction"] = examples["Original prediction"].astype(str) + examples["Prediction after perturbation"] = examples["Prediction after perturbation"].astype(str) + ps_before = pd.Series(original_pred.probabilities[~passed], index=examples.index) + ps_after = pd.Series(perturbed_pred.probabilities[~passed], index=examples.index) + examples["Original prediction"] += ps_before.apply(lambda p: f" (p = {p:.2f})") + examples["Prediction after perturbation"] += ps_after.apply(lambda p: f" (p = {p:.2f})") + + issue.add_examples(examples) + + issues.append(issue) + + return issues + + def _generate_robustness_tests(issue: Issue): from ...testing.tests.metamorphic import test_metamorphic_invariance diff --git a/giskard/scanner/robustness/feature_transformation.py b/giskard/scanner/robustness/feature_transformation.py new file mode 100644 index 0000000000..200a51a8be --- /dev/null +++ b/giskard/scanner/robustness/feature_transformation.py @@ -0,0 +1,51 @@ +from typing import List, Optional + +import numpy as np +import pandas as pd + +from ..core.core import DatasetProcessFunctionMeta +from ..registry.registry import get_object_uuid +from ..registry.transformation_function import TransformationFunction + + +class CategorialTransformation(TransformationFunction): + name: str + + def __init__(self, cat_column, needs_dataset=False): + super().__init__(None, row_level=False, cell_level=False, needs_dataset=needs_dataset) + self.cat_column = cat_column + self.meta = DatasetProcessFunctionMeta(type="TRANSFORMATION") + self.meta.uuid = get_object_uuid(self) + self.meta.code = self.name + self.meta.name = self.name + self.meta.display_name = self.name + self.meta.tags = ["pickle", "scan"] + self.meta.doc = self.meta.default_doc("Automatically generated transformation function") + + def __str__(self): + return self.name + + def execute(self, data: pd.DataFrame) -> pd.DataFrame: + feature_data = data[self.cat_column] + data.loc[feature_data.index, self.column] = feature_data.apply(self.make_perturbation) + return data + + def make_perturbation(self) -> Optional[List[str]]: + raise NotImplementedError() + + +class CategorialShuffle(CategorialTransformation): + name = "Shuffle categorial values" + + def __init__(self, cat_column, rng_seed=1729): + super.__init__(cat_column) + self.rng = np.random.default_rng(seed=rng_seed) + + def execute(self, data: pd.DataFrame): + feature_data = data[self.cat_column] + cat_values = list(set(feature_data)) + for i in range(len(cat_values)): + shuffle_cat_value = self.rng.choice(cat_values) + cat_values[i] = shuffle_cat_value + + return data diff --git a/giskard/scanner/robustness/switch_detector.py b/giskard/scanner/robustness/switch_detector.py new file mode 100644 index 0000000000..9229680207 --- /dev/null +++ b/giskard/scanner/robustness/switch_detector.py @@ -0,0 +1,32 @@ +from typing import Sequence + +from giskard.datasets.base import Dataset +from giskard.models.base.model import BaseModel +from giskard.scanner.robustness.feature_transformation import CategorialTransformation + +from ..decorators import detector +from ..issues import Robustness +from .base_detector import BaseCategorialPertubationDetector + + +@detector( + name="swtich_all", + tags=["switch_all", "robustness", "classification", "regression"], +) +class SwitchAllDetector(BaseCategorialPertubationDetector): + """Detect if a pertubation of a single categorial column from the input data can pertub the model. + + By default, we simply perform a shuffle of the data. + + As an example is having a breed category with values potential values: ['Labrador', 'Husky', 'Beagle', ...]. + The idea is to switch all Labrador` value to any other breed and so on. + """ + + _issue_group = Robustness + # @TODO: find information related to the taxonomy + _taxonomy = None + + def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[CategorialTransformation]: + from .feature_transformation import CategorialShuffle + + return [CategorialShuffle]