From 60fc2efd93b52032b694934bb9d992c4b6c4391c Mon Sep 17 00:00:00 2001 From: ChatBear Date: Wed, 17 Apr 2024 07:14:06 +0200 Subject: [PATCH 1/3] created the differents files for categorial pertubation --- .../robustness/categorial_pertubation.py | 51 +++++++++++++++++++ .../robustness/feature_transformation.py | 5 ++ 2 files changed, 56 insertions(+) create mode 100644 giskard/scanner/robustness/categorial_pertubation.py create mode 100644 giskard/scanner/robustness/feature_transformation.py diff --git a/giskard/scanner/robustness/categorial_pertubation.py b/giskard/scanner/robustness/categorial_pertubation.py new file mode 100644 index 0000000000..4739dcf3ca --- /dev/null +++ b/giskard/scanner/robustness/categorial_pertubation.py @@ -0,0 +1,51 @@ +from typing import Sequence, Optional + +from adc import abstractmethod + +from ..datasets.base import Dataset +from +from ..issues import Robustness +from ..registry import Detector +from .feature_transformation import CategorialTransformation + + +class BaseCategorialPertubationDetector(Detector): + """Base class for metamorphic detectors based on categorial feature""" + _issue_group = Robustness + # @TODO : Reserch for the adapted value for the taxonomy. + _taxonomy = None + + def __init__( + self, + transformations: Optional[Sequence[CategorialTransformation]] = None, + threshold: Optional[float] = None, + output_sensitivity: Optional[float] = None, + num_samples: Optional[int] = None, + ): + """ + Create a new instance of the detector + # @TODO : Reread the docstring in order to make open source ready + Parameters + ---------- + transformations: Optional[Sequence[CategorialTransformation]] + The categorial transformation used in the metamorphic test. If not provided, a default set of transformation will be used. + threshold: Optional[float] + The threshold for the fail rate, which is defined as the proportion of samples for which the model + prediction has changed. If the fail rate is greater than the threshold, an issue is created. + If not provided, a default threshold will be used. + output_sensitivity: Optional[float] + For regression models, the output sensitivity is the maximum relative change in the prediction that is + considered acceptable. If the relative change is greater than the output sensitivity, an issue is created. + This parameter is ignored for classification models. If not provided, a default output sensitivity will be + used. + num_samples: Optional[int] + The maximum number of samples to use for the metamorphic testing. If not provided, a default number of + samples will be used. + """ + self.transformations = transformations + self.threshold = threshold + self.num_samples = num_samples + self.output_sensitivity = output_sensitivity + + def run() + ... \ No newline at end of file diff --git a/giskard/scanner/robustness/feature_transformation.py b/giskard/scanner/robustness/feature_transformation.py new file mode 100644 index 0000000000..47d8c93834 --- /dev/null +++ b/giskard/scanner/robustness/feature_transformation.py @@ -0,0 +1,5 @@ +from ..registry.transformation_function import TransformationFunction + +class CategorialTransformation(TransformationFunction): + name: str + ... \ No newline at end of file From c3a9588b4c18850872dd1774c4a23fce7555f0c8 Mon Sep 17 00:00:00 2001 From: ChatBear Date: Sun, 21 Apr 2024 22:56:25 +0200 Subject: [PATCH 2/3] create switch detector + shuffle transformation --- giskard/scanner/robustness/__init__.py | 5 +- giskard/scanner/robustness/base_detector.py | 201 +++++++++++++++++- .../robustness/categorial_pertubation.py | 51 ----- .../robustness/feature_transformation.py | 50 ++++- giskard/scanner/robustness/switch_detector.py | 33 +++ 5 files changed, 284 insertions(+), 56 deletions(-) delete mode 100644 giskard/scanner/robustness/categorial_pertubation.py create mode 100644 giskard/scanner/robustness/switch_detector.py diff --git a/giskard/scanner/robustness/__init__.py b/giskard/scanner/robustness/__init__.py index 10f1c24803..1a890fd5f7 100644 --- a/giskard/scanner/robustness/__init__.py +++ b/giskard/scanner/robustness/__init__.py @@ -8,8 +8,9 @@ to the input data that is not supposed to affect the output significantly, and compare the output of the model before and after the transformation. """ -from .base_detector import BaseTextPerturbationDetector +from .base_detector import BaseTextPerturbationDetector, BaseCategorialPertubationDetector from .ethical_bias_detector import EthicalBiasDetector from .text_perturbation_detector import TextPerturbationDetector +from .switch_detector import SwitchAllDetector -__all__ = ["EthicalBiasDetector", "TextPerturbationDetector", "BaseTextPerturbationDetector"] +__all__ = ["EthicalBiasDetector", "TextPerturbationDetector", "BaseTextPerturbationDetector", "BaseCategorialTransformation", "SwitchAllDetector"] diff --git a/giskard/scanner/robustness/base_detector.py b/giskard/scanner/robustness/base_detector.py index 91e2ddd42b..507ca2e375 100644 --- a/giskard/scanner/robustness/base_detector.py +++ b/giskard/scanner/robustness/base_detector.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence +from typing import Optional, Sequence, Union from abc import abstractmethod @@ -12,7 +12,7 @@ from ..logger import logger from ..registry import Detector from .text_transformations import TextTransformation - +from .feature_transformation import CategorialTransformation class BaseTextPerturbationDetector(Detector): """Base class for metamorphic detectors based on text transformations.""" @@ -216,6 +216,203 @@ def _detect_issues( return issues +class BaseCategorialPertubationDetector(Detector): + """Base class for metamorphic detectors based on categorial feature""" + _issue_group = Robustness + # @TODO : Reserch for the adapted value for the taxonomy. + _taxonomy = None + + def __init__( + self, + transformations: Optional[Sequence[CategorialTransformation]] = None, + threshold: Optional[float] = None, + output_sensitivity: Optional[float] = None, + num_samples: Optional[int] = None, + ): + """ + Create a new instance of the detector + Parameters + ---------- + transformations: Optional[Sequence[CategorialTransformation]] + The categorial transformation used in the metamorphic test. If not provided, a default set of transformation will be used. + threshold: Optional[float] + The threshold for the fail rate, which is defined as the proportion of samples for which the model + prediction has changed. If the fail rate is greater than the threshold, an issue is created. + If not provided, a default threshold will be used. + output_sensitivity: Optional[float] + For regression models, the output sensitivity is the maximum relative change in the prediction that is + considered acceptable. If the relative change is greater than the output sensitivity, an issue is created. + This parameter is ignored for classification models. If not provided, a default output sensitivity will be + used. + num_samples: Optional[int] + The maximum number of samples to use for the metamorphic testing. If not provided, a default number of + samples will be used. + """ + self.transformations = transformations + self.threshold = threshold + self.num_samples = num_samples + self.output_sensitivity = output_sensitivity + + def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Sequence[Issue]: + transformations = self.transformations or self._get_default_transformations(model, dataset) + # Only analyze categorials features + cat_features = [ + f for f in features + if dataset.column_types[f] == "category" + ] + logger.info( + f"{self.__class__.__name__}: Running with transformations={[t.name for t in transformations]} " + f"threshold={self.threshold} output_sensitivity={self.output_sensitivity} num_samples={self.num_samples}" + ) + + issues = [] + for transformation in transformations: + issues.extend(self._detect_issues(model, dataset, transformation, cat_features)) + + return [i for i in issues if i] + + + @abstractmethod + def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[CategorialTransformation]: + ... + + def _detect_issues(self, + model: BaseModel, + dataset: Dataset, + transformation: CategorialTransformation, + features: Sequence[Union[str, int]] + ) -> Sequence[Issue]: + num_samples = self.num_samples if self.num_samples is not None else _get_default_num_samples(model) + output_sensitivity = ( + self.output_sensitivity if self.output_sensitivity is not None else _get_default_output_sensitivity(model) + ) + threshold = self.threshold if self.threshold is not None else _get_default_threshold(model) + + issues = [] + # @TODO: integrate this with Giskard metamorphic tests already present + for feature in features: + transformation_fn = transformation(column=feature) + transformed = dataset.transform(transformation_fn) + + changed_idx = dataset.df.index[transformed.df[feature] != dataset.df[feature]] + + if changed_idx.empty: + continue + + # Select a random subset of the changed records + if len(changed_idx) > num_samples: + rng = np.random.default_rng(747) + changed_idx = changed_idx[rng.choice(len(changed_idx), num_samples, replace=False)] + + + original_data = Dataset( + dataset.df.loc[changed_idx], + target=dataset.target, + column_types=dataset.column_types, + validation=False, + ) + perturbed_data = Dataset( + transformed.df.loc[changed_idx], + target=dataset.target, + column_types=dataset.column_types, + validation=False, + ) + + # Calculate predictions + original_pred = model.predict(original_data) + perturbed_pred = model.predict(perturbed_data) + + if model.is_classification: + passed = original_pred.raw_prediction == perturbed_pred.raw_prediction + elif model.is_regression: + rel_delta = _relative_delta(perturbed_pred.raw_prediction, original_pred.raw_prediction) + passed = np.abs(rel_delta) < output_sensitivity + elif model.is_text_generation: + try: + import evaluate + except ImportError as err: + raise LLMImportError() from err + + scorer = evaluate.load("bertscore") + score = scorer.compute( + predictions=perturbed_pred.prediction, + references=original_pred.prediction, + model_type="distilbert-base-multilingual-cased", + idf=True, + ) + passed = np.array(score["f1"]) > 1 - output_sensitivity + else: + raise NotImplementedError("Only classification, regression, or text generation models are supported.") + pass_rate = passed.mean() + fail_rate = 1 - pass_rate + logger.info( + f"{self.__class__.__name__}: Testing `{feature}` for perturbation `{transformation.name}`\tFail rate: {fail_rate:.3f}" + ) + + if fail_rate >= threshold: + # Severity + issue_level = IssueLevel.MAJOR if fail_rate >= 2 * threshold else IssueLevel.MEDIUM + + # Description + desc = ( + "When feature “{feature}” is perturbed with the transformation “{transformation_fn}”, " + "the model changes its prediction in {fail_rate_percent}% of the cases. " + "We expected the predictions not to be affected by this transformation." + ) + + failed_size = (~passed).sum() + slice_size = len(passed) + + issue = Issue( + model, + dataset, + group=self._issue_group, + level=issue_level, + transformation_fn=transformation_fn, + description=desc, + features=[feature], + meta={ + "feature": feature, + "domain": f"Feature `{feature}`", + "deviation": f"{failed_size}/{slice_size} tested samples ({round(fail_rate * 100, 2)}%) changed prediction after perturbation", + "failed_size": failed_size, + "slice_size": slice_size, + "fail_rate": fail_rate, + "fail_rate_percent": round(fail_rate * 100, 2), + "metric": "Fail rate", + "metric_value": fail_rate, + "threshold": threshold, + "output_sentitivity": output_sensitivity, + "perturbed_data_slice": perturbed_data, + "perturbed_data_slice_predictions": perturbed_pred, + }, + importance=fail_rate, + tests=_generate_robustness_tests, + taxonomy=self._taxonomy, + ) + + # Add examples + examples = original_data.df.loc[~passed, (feature,)].copy() + examples[f"{transformation_fn.name}({feature})"] = perturbed_data.df.loc[~passed, feature] + + examples["Original prediction"] = original_pred.prediction[~passed] + examples["Prediction after perturbation"] = perturbed_pred.prediction[~passed] + + if model.is_classification: + examples["Original prediction"] = examples["Original prediction"].astype(str) + examples["Prediction after perturbation"] = examples["Prediction after perturbation"].astype(str) + ps_before = pd.Series(original_pred.probabilities[~passed], index=examples.index) + ps_after = pd.Series(perturbed_pred.probabilities[~passed], index=examples.index) + examples["Original prediction"] += ps_before.apply(lambda p: f" (p = {p:.2f})") + examples["Prediction after perturbation"] += ps_after.apply(lambda p: f" (p = {p:.2f})") + + issue.add_examples(examples) + + issues.append(issue) + + return issues + + def _generate_robustness_tests(issue: Issue): from ...testing.tests.metamorphic import test_metamorphic_invariance diff --git a/giskard/scanner/robustness/categorial_pertubation.py b/giskard/scanner/robustness/categorial_pertubation.py deleted file mode 100644 index 4739dcf3ca..0000000000 --- a/giskard/scanner/robustness/categorial_pertubation.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Sequence, Optional - -from adc import abstractmethod - -from ..datasets.base import Dataset -from -from ..issues import Robustness -from ..registry import Detector -from .feature_transformation import CategorialTransformation - - -class BaseCategorialPertubationDetector(Detector): - """Base class for metamorphic detectors based on categorial feature""" - _issue_group = Robustness - # @TODO : Reserch for the adapted value for the taxonomy. - _taxonomy = None - - def __init__( - self, - transformations: Optional[Sequence[CategorialTransformation]] = None, - threshold: Optional[float] = None, - output_sensitivity: Optional[float] = None, - num_samples: Optional[int] = None, - ): - """ - Create a new instance of the detector - # @TODO : Reread the docstring in order to make open source ready - Parameters - ---------- - transformations: Optional[Sequence[CategorialTransformation]] - The categorial transformation used in the metamorphic test. If not provided, a default set of transformation will be used. - threshold: Optional[float] - The threshold for the fail rate, which is defined as the proportion of samples for which the model - prediction has changed. If the fail rate is greater than the threshold, an issue is created. - If not provided, a default threshold will be used. - output_sensitivity: Optional[float] - For regression models, the output sensitivity is the maximum relative change in the prediction that is - considered acceptable. If the relative change is greater than the output sensitivity, an issue is created. - This parameter is ignored for classification models. If not provided, a default output sensitivity will be - used. - num_samples: Optional[int] - The maximum number of samples to use for the metamorphic testing. If not provided, a default number of - samples will be used. - """ - self.transformations = transformations - self.threshold = threshold - self.num_samples = num_samples - self.output_sensitivity = output_sensitivity - - def run() - ... \ No newline at end of file diff --git a/giskard/scanner/robustness/feature_transformation.py b/giskard/scanner/robustness/feature_transformation.py index 47d8c93834..346a293aa9 100644 --- a/giskard/scanner/robustness/feature_transformation.py +++ b/giskard/scanner/robustness/feature_transformation.py @@ -1,5 +1,53 @@ +import pandas as pd +import numpy as np +from typing import Optional, List + +from ..core.core import DatasetProcessFunctionMeta +from ..registry.registry import get_object_uuid from ..registry.transformation_function import TransformationFunction + class CategorialTransformation(TransformationFunction): name: str - ... \ No newline at end of file + + def __init__(self, cat_column, needs_dataset=False): + super().__init__(None, row_level=False, cell_level=False, needs_dataset=needs_dataset) + self.cat_column = cat_column + self.meta = DatasetProcessFunctionMeta(type="TRANSFORMATION") + self.meta.uuid = get_object_uuid(self) + self.meta.code = self.name + self.meta.name = self.name + self.meta.display_name = self.name + self.meta.tags = ["pickle", "scan"] + self.meta.doc = self.meta.default_doc("Automatically generated transformation function") + + def __str__(self): + return self.name + + def execute(self, data: pd.DataFrame) -> pd.DataFrame: + feature_data = data[self.cat_column] + data.loc[feature_data.index, self.column] = feature_data.apply(self.make_perturbation) + return data + + def make_perturbation(self) -> Optional[List[str]]: + raise NotImplementedError() + + +class CategorialShuffle(CategorialTransformation): + name = "Shuffle categorial values" + + def __init__(self, cat_column, rng_seed=1729): + super.__init__(cat_column) + self.rng = np.random.default_rng(seed=rng_seed) + + def execute(self, data: pd.DataFrame): + feature_data = data[self.cat_column] + cat_values = list(set(feature_data)) + for i in range(len(cat_values)): + shuffle_cat_value = self.rng.choice(cat_values) + cat_values[i] = shuffle_cat_value + + return data + + + diff --git a/giskard/scanner/robustness/switch_detector.py b/giskard/scanner/robustness/switch_detector.py new file mode 100644 index 0000000000..e2e5a9259e --- /dev/null +++ b/giskard/scanner/robustness/switch_detector.py @@ -0,0 +1,33 @@ +from typing import Sequence + +from giskard.datasets.base import Dataset +from giskard.models.base.model import BaseModel +from giskard.scanner.robustness.feature_transformation import CategorialTransformation + +from .base_detector import BaseCategorialPertubationDetector +from ..issues import Robustness +from ..decorators import detector + +@detector( + name="swtich_all", + tags=["switch_all", "robustness", "classification", "regression"], +) +class SwitchAllDetector(BaseCategorialPertubationDetector): + """Detect if a pertubation of a single categorial column from the input data can pertub the model. + + By default, we simply perform a shuffle of the data. + + As an example is having a breed category with values potential values: ['Labrador', 'Husky', 'Beagle', ...]. + The idea is to switch all Labrador` value to any other breed and so on. + """ + + _issue_group = Robustness + # @TODO: find information related to the taxonomy + _taxonomy = None + + def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[CategorialTransformation]: + from .feature_transformation import ( + CategorialShuffle + ) + + return [CategorialShuffle] \ No newline at end of file From 6fe3730ec601a7f62a29de1b256a1efcac738598 Mon Sep 17 00:00:00 2001 From: ChatBear Date: Sat, 27 Apr 2024 22:01:48 +0900 Subject: [PATCH 3/3] black formatting --- .vscode/settings.json | 6 ++ giskard/scanner/robustness/__init__.py | 12 +++- giskard/scanner/robustness/base_detector.py | 60 +++++++++---------- .../robustness/feature_transformation.py | 38 ++++++------ giskard/scanner/robustness/switch_detector.py | 19 +++--- 5 files changed, 71 insertions(+), 64 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..de138341fc --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{"[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + }, + "isort.args":["--profile", "black"], +} \ No newline at end of file diff --git a/giskard/scanner/robustness/__init__.py b/giskard/scanner/robustness/__init__.py index 1a890fd5f7..e9b52f775f 100644 --- a/giskard/scanner/robustness/__init__.py +++ b/giskard/scanner/robustness/__init__.py @@ -8,9 +8,15 @@ to the input data that is not supposed to affect the output significantly, and compare the output of the model before and after the transformation. """ -from .base_detector import BaseTextPerturbationDetector, BaseCategorialPertubationDetector +from .base_detector import BaseTextPerturbationDetector from .ethical_bias_detector import EthicalBiasDetector -from .text_perturbation_detector import TextPerturbationDetector from .switch_detector import SwitchAllDetector +from .text_perturbation_detector import TextPerturbationDetector -__all__ = ["EthicalBiasDetector", "TextPerturbationDetector", "BaseTextPerturbationDetector", "BaseCategorialTransformation", "SwitchAllDetector"] +__all__ = [ + "EthicalBiasDetector", + "TextPerturbationDetector", + "BaseTextPerturbationDetector", + "BaseCategorialTransformation", + "SwitchAllDetector", +] diff --git a/giskard/scanner/robustness/base_detector.py b/giskard/scanner/robustness/base_detector.py index 507ca2e375..e458c78b46 100644 --- a/giskard/scanner/robustness/base_detector.py +++ b/giskard/scanner/robustness/base_detector.py @@ -11,8 +11,9 @@ from ..issues import Issue, IssueLevel, Robustness from ..logger import logger from ..registry import Detector -from .text_transformations import TextTransformation from .feature_transformation import CategorialTransformation +from .text_transformations import TextTransformation + class BaseTextPerturbationDetector(Detector): """Base class for metamorphic detectors based on text transformations.""" @@ -218,20 +219,21 @@ def _detect_issues( class BaseCategorialPertubationDetector(Detector): """Base class for metamorphic detectors based on categorial feature""" + _issue_group = Robustness # @TODO : Reserch for the adapted value for the taxonomy. - _taxonomy = None + _taxonomy = None def __init__( - self, - transformations: Optional[Sequence[CategorialTransformation]] = None, - threshold: Optional[float] = None, - output_sensitivity: Optional[float] = None, - num_samples: Optional[int] = None, + self, + transformations: Optional[Sequence[CategorialTransformation]] = None, + threshold: Optional[float] = None, + output_sensitivity: Optional[float] = None, + num_samples: Optional[int] = None, ): """ - Create a new instance of the detector - Parameters + Create a new instance of the detector + Parameters ---------- transformations: Optional[Sequence[CategorialTransformation]] The categorial transformation used in the metamorphic test. If not provided, a default set of transformation will be used. @@ -254,33 +256,30 @@ def __init__( self.output_sensitivity = output_sensitivity def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Sequence[Issue]: - transformations = self.transformations or self._get_default_transformations(model, dataset) - # Only analyze categorials features - cat_features = [ - f for f in features - if dataset.column_types[f] == "category" - ] + transformations = self.transformations or self._get_default_transformations(model, dataset) + # Only analyze categorials features + cat_features = [f for f in features if dataset.column_types[f] == "category"] logger.info( f"{self.__class__.__name__}: Running with transformations={[t.name for t in transformations]} " f"threshold={self.threshold} output_sensitivity={self.output_sensitivity} num_samples={self.num_samples}" ) - - issues = [] + + issues = [] for transformation in transformations: issues.extend(self._detect_issues(model, dataset, transformation, cat_features)) return [i for i in issues if i] - @abstractmethod def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[CategorialTransformation]: ... - - def _detect_issues(self, - model: BaseModel, - dataset: Dataset, - transformation: CategorialTransformation, - features: Sequence[Union[str, int]] + + def _detect_issues( + self, + model: BaseModel, + dataset: Dataset, + transformation: CategorialTransformation, + features: Sequence[Union[str, int]], ) -> Sequence[Issue]: num_samples = self.num_samples if self.num_samples is not None else _get_default_num_samples(model) output_sensitivity = ( @@ -291,19 +290,18 @@ def _detect_issues(self, issues = [] # @TODO: integrate this with Giskard metamorphic tests already present for feature in features: - transformation_fn = transformation(column=feature) - transformed = dataset.transform(transformation_fn) + transformation_fn = transformation(column=feature) + transformed = dataset.transform(transformation_fn) changed_idx = dataset.df.index[transformed.df[feature] != dataset.df[feature]] if changed_idx.empty: - continue - + continue + # Select a random subset of the changed records if len(changed_idx) > num_samples: rng = np.random.default_rng(747) changed_idx = changed_idx[rng.choice(len(changed_idx), num_samples, replace=False)] - original_data = Dataset( dataset.df.loc[changed_idx], @@ -321,7 +319,7 @@ def _detect_issues(self, # Calculate predictions original_pred = model.predict(original_data) perturbed_pred = model.predict(perturbed_data) - + if model.is_classification: passed = original_pred.raw_prediction == perturbed_pred.raw_prediction elif model.is_regression: @@ -411,7 +409,7 @@ def _detect_issues(self, issues.append(issue) return issues - + def _generate_robustness_tests(issue: Issue): from ...testing.tests.metamorphic import test_metamorphic_invariance diff --git a/giskard/scanner/robustness/feature_transformation.py b/giskard/scanner/robustness/feature_transformation.py index 346a293aa9..200a51a8be 100644 --- a/giskard/scanner/robustness/feature_transformation.py +++ b/giskard/scanner/robustness/feature_transformation.py @@ -1,6 +1,7 @@ -import pandas as pd -import numpy as np -from typing import Optional, List +from typing import List, Optional + +import numpy as np +import pandas as pd from ..core.core import DatasetProcessFunctionMeta from ..registry.registry import get_object_uuid @@ -11,43 +12,40 @@ class CategorialTransformation(TransformationFunction): name: str def __init__(self, cat_column, needs_dataset=False): - super().__init__(None, row_level=False, cell_level=False, needs_dataset=needs_dataset) + super().__init__(None, row_level=False, cell_level=False, needs_dataset=needs_dataset) self.cat_column = cat_column - self.meta = DatasetProcessFunctionMeta(type="TRANSFORMATION") - self.meta.uuid = get_object_uuid(self) + self.meta = DatasetProcessFunctionMeta(type="TRANSFORMATION") + self.meta.uuid = get_object_uuid(self) self.meta.code = self.name - self.meta.name = self.name + self.meta.name = self.name self.meta.display_name = self.name self.meta.tags = ["pickle", "scan"] self.meta.doc = self.meta.default_doc("Automatically generated transformation function") def __str__(self): return self.name - + def execute(self, data: pd.DataFrame) -> pd.DataFrame: - feature_data = data[self.cat_column] + feature_data = data[self.cat_column] data.loc[feature_data.index, self.column] = feature_data.apply(self.make_perturbation) return data def make_perturbation(self) -> Optional[List[str]]: - raise NotImplementedError() + raise NotImplementedError() class CategorialShuffle(CategorialTransformation): - name = "Shuffle categorial values" + name = "Shuffle categorial values" def __init__(self, cat_column, rng_seed=1729): - super.__init__(cat_column) - self.rng = np.random.default_rng(seed=rng_seed) + super.__init__(cat_column) + self.rng = np.random.default_rng(seed=rng_seed) def execute(self, data: pd.DataFrame): - feature_data = data[self.cat_column] + feature_data = data[self.cat_column] cat_values = list(set(feature_data)) - for i in range(len(cat_values)): - shuffle_cat_value = self.rng.choice(cat_values) + for i in range(len(cat_values)): + shuffle_cat_value = self.rng.choice(cat_values) cat_values[i] = shuffle_cat_value - - return data - - + return data diff --git a/giskard/scanner/robustness/switch_detector.py b/giskard/scanner/robustness/switch_detector.py index e2e5a9259e..9229680207 100644 --- a/giskard/scanner/robustness/switch_detector.py +++ b/giskard/scanner/robustness/switch_detector.py @@ -2,32 +2,31 @@ from giskard.datasets.base import Dataset from giskard.models.base.model import BaseModel -from giskard.scanner.robustness.feature_transformation import CategorialTransformation +from giskard.scanner.robustness.feature_transformation import CategorialTransformation -from .base_detector import BaseCategorialPertubationDetector -from ..issues import Robustness from ..decorators import detector +from ..issues import Robustness +from .base_detector import BaseCategorialPertubationDetector + @detector( name="swtich_all", tags=["switch_all", "robustness", "classification", "regression"], ) class SwitchAllDetector(BaseCategorialPertubationDetector): - """Detect if a pertubation of a single categorial column from the input data can pertub the model. + """Detect if a pertubation of a single categorial column from the input data can pertub the model. By default, we simply perform a shuffle of the data. - + As an example is having a breed category with values potential values: ['Labrador', 'Husky', 'Beagle', ...]. The idea is to switch all Labrador` value to any other breed and so on. """ _issue_group = Robustness # @TODO: find information related to the taxonomy - _taxonomy = None + _taxonomy = None def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[CategorialTransformation]: - from .feature_transformation import ( - CategorialShuffle - ) + from .feature_transformation import CategorialShuffle - return [CategorialShuffle] \ No newline at end of file + return [CategorialShuffle]