From e458ab3c7b9099f906e31807e409a00dffea5c77 Mon Sep 17 00:00:00 2001 From: RobertSamoilescu Date: Mon, 17 Jan 2022 11:24:44 -0600 Subject: [PATCH] Docstrings updates (#548) * ale docs update * ale some conventions updates * ale single-under * moving back to original return format with inconsistency italic/normal font * first pass through anchor_base * back to standard returns without : * anchor_text and anchor_explanation first pass * first pass through anchor_text * first pass through cfproto and cem * fist pass through cfrl_base * first pass through cfrl_tabular * first pass through shap_wrappers * first pass through models * First pass through backend top * first pass through backends pytorch * first pass through backends tensorflow * small updates * minor correction * example duplicated attribute - TabularSampler * replaced attributes docstrings * second pass through explainers. * added explain fields up to cfproto (inclusive) * add description of explanation return fields * add links to docstrings + minor corrections * tensor to array * fixed duplicated target names * first pass confidence docs * fixed broken links in shap * included links in defaults SHAP * minor fixes * fixed minor indentation and punctuation. private IG build_explanation * Fixed IG and test_shap_wrappers build_explanation * minor updates on defaults, interfaces, autoencoder and anchortabular * fixed approximation_methods * fixed data * fixed discretizer * fix app_methods, distance & distributed * fix language model * fist pass through utils * second pass through utils * fixed mypy errors --- alibi/api/defaults.py | 6 +- alibi/api/interfaces.py | 26 +- alibi/confidence/model_linearity.py | 89 +++-- alibi/confidence/trustscore.py | 24 +- alibi/datasets.py | 25 +- alibi/explainers/ale.py | 81 +++-- alibi/explainers/anchor_base.py | 127 ++++--- alibi/explainers/anchor_explanation.py | 32 +- alibi/explainers/anchor_image.py | 97 +++-- alibi/explainers/anchor_tabular.py | 213 ++++++----- alibi/explainers/anchor_text.py | 297 ++++++++------- alibi/explainers/backends/cfrl_base.py | 10 +- alibi/explainers/backends/cfrl_tabular.py | 78 ++-- .../explainers/backends/pytorch/cfrl_base.py | 51 +-- .../backends/pytorch/cfrl_tabular.py | 14 +- .../backends/tensorflow/cfrl_base.py | 67 ++-- .../backends/tensorflow/cfrl_tabular.py | 15 +- alibi/explainers/cem.py | 98 ++--- alibi/explainers/cfproto.py | 139 +++---- alibi/explainers/cfrl_base.py | 231 ++++++------ alibi/explainers/cfrl_tabular.py | 83 +++-- alibi/explainers/counterfactual.py | 75 ++-- alibi/explainers/integrated_gradients.py | 139 +++---- alibi/explainers/shap_wrappers.py | 255 ++++++------- alibi/explainers/tests/test_shap_wrappers.py | 10 +- alibi/models/pytorch/actor_critic.py | 24 ++ alibi/models/pytorch/autoencoder.py | 35 +- alibi/models/pytorch/cfrl_models.py | 61 ++++ alibi/models/pytorch/metrics.py | 8 +- alibi/models/pytorch/model.py | 12 +- alibi/models/tensorflow/actor_critic.py | 28 ++ alibi/models/tensorflow/autoencoder.py | 45 ++- alibi/models/tensorflow/cfrl_models.py | 73 ++++ alibi/utils/approximation_methods.py | 59 +-- alibi/utils/data.py | 8 +- alibi/utils/discretizer.py | 19 +- alibi/utils/distance.py | 21 +- alibi/utils/distributed.py | 339 ++++++++++-------- alibi/utils/distributions.py | 4 +- alibi/utils/download.py | 4 +- alibi/utils/gradients.py | 18 +- alibi/utils/lang_model.py | 53 +-- alibi/utils/mapping.py | 10 +- alibi/utils/visualization.py | 174 ++++----- alibi/utils/wrappers.py | 8 +- doc/source/methods/ALE.ipynb | 14 +- 46 files changed, 1894 insertions(+), 1405 deletions(-) diff --git a/alibi/api/defaults.py b/alibi/api/defaults.py index b2bb021ce..24aa8c392 100644 --- a/alibi/api/defaults.py +++ b/alibi/api/defaults.py @@ -104,7 +104,8 @@ 'kwargs', ] """ -KernelShap parameters updated and return in metadata['params']. +KernelShap parameters updated and returned in ``metadata['params']``. +See :py:class:`alibi.explainers.shap_wrappers.KernelShap`. """ DEFAULT_META_KERNEL_SHAP = { @@ -172,7 +173,8 @@ 'kwargs' ] """ -TreeShap parameters updated and return in metadata['params']. +TreeShap parameters updated and returned in ``metadata['params']``. +See :py:class:`alibi.explainers.shap_wrappers.TreeShap`. """ DEFAULT_META_TREE_SHAP = { diff --git a/alibi/api/interfaces.py b/alibi/api/interfaces.py index e78e169dd..d872f4728 100644 --- a/alibi/api/interfaces.py +++ b/alibi/api/interfaces.py @@ -68,7 +68,7 @@ class Explainer(abc.ABC): """ Base class for explainer algorithms """ - meta = attr.ib(default=attr.Factory(default_meta), repr=alibi_pformat) # type: dict + meta: dict = attr.ib(default=attr.Factory(default_meta), repr=alibi_pformat) #: Explainer meta-data. def __attrs_post_init__(self): # add a name and version to the metadata dictionary @@ -102,6 +102,14 @@ def load(cls, path: Union[str, os.PathLike], predictor: Any) -> "Explainer": return load_explainer(path, predictor) def reset_predictor(self, predictor: Any) -> None: + """ + Resets the predictor. + + Parameters + ---------- + predictor + New predictor. + """ raise NotImplementedError def save(self, path: Union[str, os.PathLike]) -> None: @@ -118,14 +126,14 @@ def save(self, path: Union[str, os.PathLike]) -> None: def _update_metadata(self, data_dict: dict, params: bool = False) -> None: """ Updates the metadata of the explainer using the data from the `data_dict`. If the params option - is specified, then each key-value pair is added to the metadata `'params'` dictionary. + is specified, then each key-value pair is added to the metadata ``'params'`` dictionary. Parameters ---------- data_dict Contains the data to be stored in the metadata. params - If True, the method updates the `'params'` attribute of the metatadata. + If ``True``, the method updates the ``'params'`` attribute of the metadata. """ if params: @@ -151,34 +159,34 @@ class Explanation: def __attrs_post_init__(self): """ - Expose keys stored in self.meta and self.data as attributes of the class. + Expose keys stored in `self.meta` and `self.data` as attributes of the class. """ for key, value in ChainMap(self.meta, self.data).items(): setattr(self, key, value) def to_json(self) -> str: """ - Serialize the explanation data and metadata into a json format. + Serialize the explanation data and metadata into a `json` format. Returns ------- - String containing json representation of the explanation + String containing `json` representation of the explanation. """ return json.dumps(attr.asdict(self), cls=NumpyEncoder) @classmethod def from_json(cls, jsonrepr) -> "Explanation": """ - Create an instance of an Explanation class using a json representation of the Explanation. + Create an instance of an `Explanation` class using a `json` representation of the `Explanation`. Parameters ---------- jsonrepr - json representation of an explanation + `json` representation of an explanation. Returns ------- - An Explanation object + An Explanation object. """ dictrepr = json.loads(jsonrepr) try: diff --git a/alibi/confidence/model_linearity.py b/alibi/confidence/model_linearity.py index 695c51849..c45a8e9a2 100644 --- a/alibi/confidence/model_linearity.py +++ b/alibi/confidence/model_linearity.py @@ -16,15 +16,15 @@ def _linear_superposition(alphas, vecs, shape): Parameters ---------- alphas - Coefficients of the superposition + Coefficients of the superposition. vecs - Tensors of the superposition + Tensors of the superposition. shape - Shape of each tensor + Shape of each tensor. Returns ------- - Linear tensor superposition + Linear tensor superposition. """ input_str = string.ascii_lowercase[2: 2 + len(shape)] einstr = 'a,ba{}->b{}'.format(input_str, input_str) @@ -39,19 +39,19 @@ def _calculate_global_linearity(predict_fn: Callable, input_shape: Tuple, X_samp Parameters ---------- predict_fn - Model prediction function + Model prediction function. input_shape - Shape of the input + Shape of the input. X_samples - Array of feature vectors in the linear superposition + Array of feature vectors in the linear superposition. model_type - 'classifier' or 'regressor' + Supported values: ``'classifier'`` | ``'regressor'``. alphas - Array of coefficients in the linear superposition + Array of coefficients in the linear superposition. Returns ------- - Linearity score + Linearity score. """ ss = X_samples.shape[:2] # X_samples shape=(nb_instances, nb_samples, nb_features) @@ -105,27 +105,27 @@ def _calculate_global_linearity(predict_fn: Callable, input_shape: Tuple, X_samp def _calculate_pairwise_linearity(predict_fn: Callable, x: np.ndarray, input_shape: Tuple, X_samples: np.ndarray, model_type: str, alphas: np.ndarray) -> np.ndarray: - """Calculates the norm of the difference between the output of a linear superposition of a test vector x and - vectors in X_samples and the linear superposition of the outputs, averaged over all the vectors in X_samples. + """Calculates the norm of the difference between the output of a linear superposition of a test vector `x` and + vectors in `X_samples` and the linear superposition of the outputs, averaged over all the vectors in `X_samples`. Parameters ---------- predict_fn - Model prediction function + Model prediction function. x - Test instance for which to calculate the linearity measure + Test instance for which to calculate the linearity measure. input_shape - Shape of the input + Shape of the input. X_samples - Array of feature vectors in the linear superposition + Array of feature vectors in the linear superposition. model_type - 'classifier' or 'regressor' + Supported values: ``'classifier'`` | ``'regressor'``. alphas - Array of coefficients in the linear superposition + Array of coefficients in the linear superposition. Returns ------- - Linearity score + Linearity score. """ ss = X_samples.shape[:2] # X_samples shape=(nb_instances, nb_samples, nb_features) @@ -190,7 +190,7 @@ def _sample_knn(x: np.ndarray, X_train: np.ndarray, nb_samples: int = 10) -> np. Parameters ---------- x - Central instance for sampling + Central instance for sampling. X_train Training set. nb_samples @@ -198,7 +198,7 @@ def _sample_knn(x: np.ndarray, X_train: np.ndarray, nb_samples: int = 10) -> np. Returns ------- - Sampled vectors + Sampled vectors. """ x = x.reshape(x.shape[0], -1) @@ -221,15 +221,15 @@ def _sample_knn(x: np.ndarray, X_train: np.ndarray, nb_samples: int = 10) -> np. def _sample_grid(x: np.ndarray, feature_range: np.ndarray, epsilon: float = 0.04, nb_samples: int = 10, res: int = 100) -> np.ndarray: - """Samples data points uniformly from an interval centered at x and with size epsilon * Delta, - with delta = f_max - f_min the features ranges. + """Samples data points uniformly from an interval centered at `x` and with size `epsilon * delta`, + with `delta = f_max - f_min` the features ranges. Parameters ---------- x Instance of interest. feature_range - Array with min and max values for each feature + Array with min and max values for each feature. epsilon Size of the sampling region around central instance as percentage of features range. nb_samples @@ -237,7 +237,7 @@ def _sample_grid(x: np.ndarray, feature_range: np.ndarray, epsilon: float = 0.04 Returns ------- - Sampled vectors + Sampled vectors. """ nb_instances = x.shape[0] @@ -271,7 +271,7 @@ def _linearity_measure(predict_fn: Callable, alphas: Optional[np.ndarray] = None, model_type: str = 'classifier', agg: str = 'global') -> np.ndarray: - """Calculate the linearity measure of the model around an instance of interest x. + """Calculate the linearity measure of the model around an instance of interest `x`. Parameters ---------- @@ -284,7 +284,7 @@ def _linearity_measure(predict_fn: Callable, feature_range Array with min and max values for each feature. method - Method for sampling. Supported values 'knn' or 'grid'. + Method for sampling. Supported values: ``'knn'`` | ``'grid'``. epsilon Size of the sampling region around the central instance as a percentage of feature range. nb_samples @@ -294,13 +294,13 @@ def _linearity_measure(predict_fn: Callable, alphas Array of coefficients in the superposition. model_type - Type of task. Supported values are 'regressor' or 'classifier'. + Type of task. Supported values: ``'regressor'`` | ``'classifier'``. agg - Aggregation method. Supported values are 'global' or 'pairwise'. + Aggregation method. Supported values: ``'global'`` | ``'pairwise'``. Returns ------- - Linearity score + Linearity score. """ input_shape = x.shape[1:] @@ -339,11 +339,11 @@ def _infer_feature_range(X_train: np.ndarray) -> np.ndarray: Parameters ---------- X_train - Training set + Training set. Returns ------- - Feature range + Feature range. """ X_train = X_train.reshape(X_train.shape[0], -1) return np.vstack((X_train.min(axis=0), X_train.max(axis=0))).T @@ -365,7 +365,7 @@ def __init__(self, Parameters ---------- method - Method for sampling. Supported methods are 'knn' or 'grid'. + Method for sampling. Supported methods: ``'knn'`` | ``'grid'``. epsilon Size of the sampling region around the central instance as a percentage of the features range. nb_samples @@ -375,9 +375,9 @@ def __init__(self, alphas Coefficients in the superposition. agg - Aggregation method. Supported values are 'global' or 'pairwise'. + Aggregation method. Supported values: ``'global'`` | ``'pairwise'``. model_type - Type of task. Supported values are 'regressor' or 'classifier'. + Type of task. Supported values: ``'regressor'`` | ``'classifier'``. """ self.method = method self.epsilon = epsilon @@ -395,11 +395,8 @@ def fit(self, X_train: np.ndarray) -> None: Parameters ---------- X_train - Training set + Training set. - Returns - ------- - None """ self.X_train = X_train self.feature_range = _infer_feature_range(X_train) @@ -412,13 +409,13 @@ def score(self, predict_fn: Callable, x: np.ndarray) -> np.ndarray: Parameters ---------- predict_fn - Prediction function + Prediction function. x - Instance of interest + Instance of interest. Returns ------- - Linearity measure + Linearity measure. """ input_shape = x.shape[1:] @@ -466,7 +463,7 @@ def linearity_measure(predict_fn: Callable, feature_range Array with min and max values for each feature. method - Method for sampling. Supported values 'knn' or 'grid'. + Method for sampling. Supported values: ``'knn'`` | ``'grid'``. X_train Training set. epsilon @@ -478,13 +475,13 @@ def linearity_measure(predict_fn: Callable, alphas Coefficients in the superposition. agg - Aggregation method. Supported values 'global' or 'pairwise'. + Aggregation method. Supported values: ``'global'`` | ``'pairwise'``. model_type - Type of task. Supported values 'regressor' or 'classifier'. + Type of task. Supported values: ``'regressor'`` | ``'classifier'``. Returns ------- - Linearity measure + Linearity measure. """ if method == 'knn': diff --git a/alibi/confidence/trustscore.py b/alibi/confidence/trustscore.py index a733361e6..0b1d3754c 100644 --- a/alibi/confidence/trustscore.py +++ b/alibi/confidence/trustscore.py @@ -26,15 +26,15 @@ def __init__(self, alpha Fraction of instances to filter out to reduce impact of outliers. filter_type - Filter method; either 'distance_knn' or 'probability_knn' + Filter method: ``'distance_knn'`` | ``'probability_knn'``. leaf_size Number of points at which to switch to brute-force. Affects speed and memory required to build trees. - Memory to store the tree scales with n_samples / leaf_size. + Memory to store the tree scales with `n_samples / leaf_size`. metric - Distance metric used for the tree. See sklearn's DistanceMetric class for a list of available metrics. + Distance metric used for the tree. See `sklearn` DistanceMetric class for a list of available metrics. dist_filter_type - Use either the distance to the k-nearest point (dist_filter_type = 'point') or - the average distance from the first to the k-nearest point in the data (dist_filter_type = 'mean'). + Use either the distance to the k-nearest point (``dist_filter_type = 'point'``) or + the average distance from the first to the k-nearest point in the data (``dist_filter_type = 'mean'``). """ self.k_filter = k_filter self.alpha = alpha @@ -52,7 +52,7 @@ def filter_by_distance_knn(self, X: np.ndarray) -> np.ndarray: Parameters ---------- X - Data + Data. Returns ------- @@ -75,9 +75,9 @@ def filter_by_probability_knn(self, X: np.ndarray, Y: np.ndarray) -> Tuple[np.nd Parameters ---------- X - Data + Data. Y - Predicted class labels + Predicted class labels. Returns ------- @@ -105,11 +105,11 @@ def fit(self, X: np.ndarray, Y: np.ndarray, classes: Optional[int] = None) -> No Parameters ---------- X - Data + Data. Y Target labels, either one-hot encoded or the actual class label. classes - Number of prediction classes, needs to be provided if Y equals the predicted class. + Number of prediction classes, needs to be provided if `Y` equals the predicted class. """ self.classes = classes if classes is not None else Y.shape[1] self.kdtrees = [None] * self.classes # type: Any @@ -161,8 +161,8 @@ def score(self, X: np.ndarray, Y: np.ndarray, k: int = 2, dist_type: str = 'poin k Number of nearest neighbors used for distance calculation. dist_type - Use either the distance to the k-nearest point (dist_type = 'point') or - the average distance from the first to the k-nearest point in the data (dist_type = 'mean'). + Use either the distance to the k-nearest point (``dist_type = 'point'``) or + the average distance from the first to the k-nearest point in the data (``dist_type = 'mean'``). Returns ------- diff --git a/alibi/datasets.py b/alibi/datasets.py index b16ddb5d2..121327754 100644 --- a/alibi/datasets.py +++ b/alibi/datasets.py @@ -39,9 +39,9 @@ def load_cats(target_size: tuple = (299, 299), return_X_y: bool = False) -> Unio Parameters ---------- target_size - Size of the returned images, used to crop images for a specified model input size + Size of the returned images, used to crop images for a specified model input size. return_X_y - If true, return features X and labels y as numpy arrays, if False return a Bunch object + If ``True``, return features `X` and labels `y` as `numpy` arrays. If ``False`` return a `Bunch` object Returns ------- @@ -49,7 +49,7 @@ def load_cats(target_size: tuple = (299, 299), return_X_y: bool = False) -> Unio Bunch object with fields 'data', 'target' and 'target_names'. Both `targets` and `target_names` are taken from the original Imagenet. (data, target) - Tuple if ``return_X_y`` is true + Tuple if ``return_X_y=True``. """ tar = tarfile.open(fileobj=BytesIO(pkgutil.get_data(__name__, "data/cats.tar.gz")), # type: ignore[arg-type] mode='r:gz') @@ -92,7 +92,7 @@ def fetch_movie_sentiment(return_X_y: bool = False, url_id: int = 0) -> Union[Bu Parameters ---------- return_X_y - If true, return features X and labels y as Python lists, if False return a Bunch object + If ``True``, return features `X` and labels `y` as `Python` lists. If ``False`` return a `Bunch` object. url_id Index specifying which URL to use for downloading @@ -101,7 +101,7 @@ def fetch_movie_sentiment(return_X_y: bool = False, url_id: int = 0) -> Union[Bu Bunch Movie reviews and sentiment labels (0 means 'negative' and 1 means 'positive'). (data, target) - Tuple if ``return_X_y`` is true + Tuple if ``return_X_y=True``. """ url = MOVIESENTIMENT_URLS[url_id] try: @@ -140,11 +140,11 @@ def fetch_adult(features_drop: Optional[list] = None, return_X_y: bool = False, Parameters ---------- features_drop - List of features to be dropped from dataset, by default drops ["fnlwgt", "Education-Num"] + List of features to be dropped from dataset, by default drops ``["fnlwgt", "Education-Num"]``. return_X_y - If true, return features X and labels y as numpy arrays, if False return a Bunch object + If ``True``, return features `X` and labels `y` as `numpy` arrays. If ``False`` return a `Bunch` object. url_id - Index specifying which URL to use for downloading + Index specifying which URL to use for downloading. Returns ------- @@ -152,7 +152,7 @@ def fetch_adult(features_drop: Optional[list] = None, return_X_y: bool = False, Dataset, labels, a list of features and a dictionary containing a list with the potential categories for each categorical feature where the key refers to the feature column. (data, target) - Tuple if ``return_X_y`` is true + Tuple if ``return_X_y=True`` """ if features_drop is None: features_drop = ["fnlwgt", "Education-Num"] @@ -251,19 +251,20 @@ def fetch_adult(features_drop: Optional[list] = None, return_X_y: bool = False, return Bunch(data=data, target=labels, feature_names=features, target_names=target_names, category_map=category_map) -def fetch_fashion_mnist(return_X_y: bool = False): +def fetch_fashion_mnist(return_X_y: bool = False + ) -> Union[Bunch, Tuple[np.ndarray, np.ndarray]]: """ Loads the Fashion MNIST dataset. Parameters ---------- return_X_y: - If True, an NxMxP array of data points and N-array of labels are returned + If ``True``, an `N x M x P` array of data points and `N`-array of labels are returned instead of a dict. Returns ------- - If return_X_y is False, a Bunch object with fields 'data', 'targets' and 'target_names' + If ``return_X_y=False``, a Bunch object with fields 'data', 'targets' and 'target_names' is returned. Otherwise an array with data points and an array of labels is returned. """ diff --git a/alibi/explainers/ale.py b/alibi/explainers/ale.py index 5cfc99de0..7c86db983 100644 --- a/alibi/explainers/ale.py +++ b/alibi/explainers/ale.py @@ -38,39 +38,39 @@ def __init__(self, Parameters ---------- predictor - A callable that takes in an NxF array as input and outputs an NxT array (N - number of - data points, F - number of features, T - number of outputs/targets (e.g. 1 for single output - regression, >=2 for classification). + A callable that takes in an `N x F` array as input and outputs an `N x T` array (`N` - number of + data points, `F` - number of features, `T` - number of outputs/targets (e.g. 1 for single output + regression, >=2 for classification)). feature_names A list of feature names used for displaying results. target_names A list of target/output names used for displaying results. check_feature_resolution - If true, the number of unique values is calculated for each feature and if it is less than - `low_resolution_threshold` then the feature values are used for gridpoints instead of quantiles. + If ``True``, the number of unique values is calculated for each feature and if it is less than + `low_resolution_threshold` then the feature values are used for grid-points instead of quantiles. This may increase the runtime of the algorithm for large datasets. low_resolution_threshold If a feature has at most this many unique values, these are used as the grid points instead of quantiles. This is to avoid situations when the quantile algorithm returns quantiles between discrete values which can result in jumps in the ALE plot obscuring the true effect. Only used if - `check_feature_resolution` is True. + `check_feature_resolution` is ``True``. extrapolate_constant If a feature is constant, only one quantile exists where all the data points lie. In this case the - ALE value at that poiny is zero, however this may be misleading if the feature does have an effect on - the model. If this parameter is set to `True`, the ALE values are calculated on an interval surrounding + ALE value at that point is zero, however this may be misleading if the feature does have an effect on + the model. If this parameter is set to ``True``, the ALE values are calculated on an interval surrounding the constant value. The interval length is controlled by the `extrapolate_constant_perc` and `extrapolate_constant_min` arguments. extrapolate_constant_perc Percentage by which to extrapolate a constant feature value to create an interval for ALE calculation. If `q` is the constant feature value, creates an interval `[q - q/extrapolate_constant_perc, q + q/extrapolate_constant_perc]` for which ALE is calculated. - Only relevant if `extrapolate_constant` is set to `True`. + Only relevant if `extrapolate_constant` is set to ``True``. extrapolate_constant_min Controls the minimum extrapolation length for constant features. An interval constructed for constant - features is guaranteed to be 2*extrapolate_constant_min wide centered on the feature value. This allows + features is guaranteed to be `2 x extrapolate_constant_min` wide centered on the feature value. This allows for capturing model behaviour around constant features which have small value so that `extrapolate_constant_perc` is not so helpful. - Only relevant if `extrapolate_constant` is set to `True`. + Only relevant if `extrapolate_constant` is set to ``True``. """ super().__init__(meta=copy.deepcopy(DEFAULT_META_ALE)) @@ -96,7 +96,7 @@ def explain(self, X: np.ndarray, features: Optional[List[int]] = None, min_bin_p Parameters ---------- X - An NxF tabular dataset used to calculate the ALE curves. This is typically the training dataset + An `N x F` tabular dataset used to calculate the ALE curves. This is typically the training dataset or a representative sample. features: Features for which to calculate ALE. @@ -106,8 +106,12 @@ def explain(self, X: np.ndarray, features: Optional[List[int]] = None, min_bin_p Returns ------- + explanation An `Explanation` object containing the data and the metadata of the calculated ALE curves. + See usage at `ALE examples`_ for details. + .. _ALE examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/ALE.html """ self.meta['params'].update(min_bin_points=min_bin_points) @@ -162,7 +166,7 @@ def explain(self, X: np.ndarray, features: Optional[List[int]] = None, min_bin_p # I've replaced this with feature deciles which is coarser but has constant space complexity # as opposed to a rugplot. Alternatively, could consider subsampling to produce a rug with some # maximum number of points. - return self.build_explanation( + return self._build_explanation( ale_values=ale_values, ale0=ale0, constant_value=constant_value, @@ -171,13 +175,13 @@ def explain(self, X: np.ndarray, features: Optional[List[int]] = None, min_bin_p feature_names=feature_names ) - def build_explanation(self, - ale_values: List[np.ndarray], - ale0: List[np.ndarray], - constant_value: float, - feature_values: List[np.ndarray], - feature_deciles: List[np.ndarray], - feature_names: np.ndarray) -> Explanation: + def _build_explanation(self, + ale_values: List[np.ndarray], + ale0: List[np.ndarray], + constant_value: float, + feature_values: List[np.ndarray], + feature_deciles: List[np.ndarray], + feature_names: np.ndarray) -> Explanation: """ Helper method to build the Explanation object. """ @@ -199,6 +203,14 @@ def build_explanation(self, return Explanation(meta=copy.deepcopy(self.meta), data=data) def reset_predictor(self, predictor: Callable) -> None: + """ + Resets the predictor function. + + Parameters + ---------- + predictor + New predictor function. + """ self.predictor = predictor @@ -227,14 +239,14 @@ def bisect_fun(fun: Callable, target: float, lo: int, hi: int) -> int: """ Bisection algorithm for function evaluation with integer support. - Assumes the function is non-decreasing on the interval [lo, hi]. - Return an integer value v such that for all x=v fun(x)>=target. + Assumes the function is non-decreasing on the interval `[lo, hi]`. + Return an integer value v such that for all `x=v, fun(x)>=target`. This is equivalent to the library function `bisect.bisect_left` but for functions defined on integers. Parameters ---------- fun - A function defined on integers in the range [lo, hi] and returning floats. + A function defined on integers in the range `[lo, hi]` and returning floats. target Target value to be searched for. lo @@ -258,7 +270,7 @@ def bisect_fun(fun: Callable, target: float, lo: int, hi: int) -> int: def minimum_satisfied(values: np.ndarray, min_bin_points: int, n: int) -> int: """ - Calculates whether the partition into bins induced by n quantiles + Calculates whether the partition into bins induced by `n` quantiles has the minimum number of points in each resulting bin. Parameters @@ -272,7 +284,8 @@ def minimum_satisfied(values: np.ndarray, min_bin_points: int, n: int) -> int: Returns ------- - Integer encoded boolean with 1 - each bin has at least `min_bin_points` and 0 otherwise. + Integer encoded boolean with 1 - each bin has at least `min_bin_points` and 0 otherwise. + """ q = np.unique(get_quantiles(values, num_quantiles=n)) indices = np.searchsorted(q, values, side='left') @@ -365,7 +378,7 @@ def ale_num( q Array of quantiles of the input values. ale - ALE values for each feature at each of the points in q. + ALE values for each feature at each of the points in `q`. ale0 The constant offset used to center the ALE curves. @@ -454,25 +467,25 @@ def plot_ale(exp: Explanation, Parameters ---------- exp - An `Explanation` object produced by a call to the `ALE.explain` method. + An `Explanation` object produced by a call to the :py:meth:`alibi.explainers.ale.ALE.explain` method. features - A list of features for which to plot the ALE curves or `all` for all features. + A list of features for which to plot the ALE curves or ``'all'`` for all features. Can be a mix of integers denoting feature index or strings denoting entries in - `exp.feature_names`. Defaults to 'all'. + `exp.feature_names`. Defaults to ``'all'``. targets - A list of targets for which to plot the ALE curves or `all` for all targets. + A list of targets for which to plot the ALE curves or ``'all'`` for all targets. Can be a mix of integers denoting target index or strings denoting entries in - `exp.target_names`. Defaults to 'all'. + `exp.target_names`. Defaults to ``'all'``. n_cols Number of columns to organize the resulting plot into. sharey A parameter specifying whether the y-axis of the ALE curves should be on the same scale - for several features. Possible values are `all`, `row`, `None`. + for several features. Possible values are: ``'all'`` | ``'row'`` | ``None``. constant A parameter specifying whether the constant zeroth order effects should be added to the ALE first order effects. ax - A `matplotlib` axes object or a numpy array of `matplotlib` axes to plot on. + A `matplotlib` axes object or a `numpy` array of `matplotlib` axes to plot on. line_kw Keyword arguments passed to the `plt.plot` function. fig_kw @@ -480,7 +493,7 @@ def plot_ale(exp: Explanation, Returns ------- - An array of matplotlib axes with the resulting ALE plots. + An array of `matplotlib` axes with the resulting ALE plots. """ import matplotlib.pyplot as plt diff --git a/alibi/explainers/anchor_base.py b/alibi/explainers/anchor_base.py index 9b51b9f56..dd71a5132 100644 --- a/alibi/explainers/anchor_base.py +++ b/alibi/explainers/anchor_base.py @@ -21,7 +21,7 @@ def __init__(self, samplers: List[Callable], **kwargs) -> None: Parameters --------- samplers - Objects that can be called with args (result, n_samples) tuple to draw samples. + Objects that can be called with args (`result`, `n_samples`) tuple to draw samples. """ self.sample_fcn = samplers[0] @@ -40,11 +40,9 @@ def _init_state(self, batch_size: int, coverage_data: np.ndarray) -> None: Parameters ---------- batch_size - See anchor_beam method. + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam.anchor_beam` method. coverage_data - See _get_coverage_samples method. - sample_cache_size - See anchor_beam method. + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam._get_coverage_samples` method. """ prealloc_size = batch_size * self.sample_cache_size @@ -78,11 +76,11 @@ def _sort(x: tuple, allow_duplicates=False) -> tuple: x: Tuple to be sorted. allow_duplicates: - If True, duplicate entries are kept + If ``True``, duplicate entries are kept. Returns ------- - A sorted tuple. + A sorted tuple. """ if allow_duplicates: @@ -100,13 +98,13 @@ def dup_bernoulli(p: np.ndarray, level: np.ndarray, n_iter: int = 17) -> np.ndar p Precision of candidate anchors. level - beta / nb of samples for each result. + `beta / nb of samples` for each result. n_iter Number of iterations during lower bound update. Returns ------- - Updated upper precision bounds array. + Updated upper precision bounds array. """ # TODO: where does 17x sampling come from? lm = p.copy() @@ -132,13 +130,13 @@ def dlow_bernoulli(p: np.ndarray, level: np.ndarray, n_iter: int = 17) -> np.nda p Precision of candidate anchors. level - beta / nb of samples for each result. + `beta / nb of samples` for each result. n_iter Number of iterations during lower bound update. Returns ------- - Updated lower precision bounds array. + Updated lower precision bounds array. """ um = p.copy() @@ -164,10 +162,11 @@ def compute_beta(n_features: int, t: int, delta: float) -> float: t Iteration number. delta + Confidence budget, candidate anchors have close to optimal precisions with prob. `1 - delta`. Returns ------- - Level used to update upper and lower precision bounds. + Level used to update upper and lower precision bounds. """ # TODO: where do magic numbers come from? alpha = 1.1 @@ -183,14 +182,14 @@ def _get_coverage_samples(self, coverage_samples: int, samplers: Optional[List[C Parameters --------- coverage_samples - See anchor_beam method. + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam.anchor_beam` method. samplers - See __init__ method. + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam.__init__` method. Returns ------- coverage_data - binarised samples, where 1 indicates the feature has same value/is in same beam as + Binarised samples, where 1 indicates the feature has same value/is in same beam as instance to be explained. Used to determine, e.g., which samples an result applies to. """ @@ -201,7 +200,7 @@ def _get_coverage_samples(self, coverage_samples: int, samplers: Optional[List[C def select_critical_arms(self, means: np.ndarray, ub: np.ndarray, lb: np.ndarray, n_samples: np.ndarray, delta: float, top_n: int, t: int): """ - Determines a set of two anchors by updating the upper bound for low emprical precision anchors and + Determines a set of two anchors by updating the upper bound for low empirical precision anchors and the lower bound for anchors with high empirical precision. Parameters @@ -215,7 +214,7 @@ def select_critical_arms(self, means: np.ndarray, ub: np.ndarray, lb: np.ndarray n_samples The number of samples drawn for each candidate result. delta - Confidence budget, candidate anchors have close to optimal precisions with prob. 1 - delta. + Confidence budget, candidate anchors have close to optimal precisions with prob. `1 - delta`. top_n Number of arms to be selected. t @@ -264,7 +263,7 @@ def kllucb(self, anchors: list, init_stats: dict, epsilon: float, delta: float, epsilon Precision bound tolerance for convergence. delta - Used to compute beta. + Used to compute `beta`. batch_size Number of samples. top_n @@ -272,11 +271,11 @@ def kllucb(self, anchors: list, init_stats: dict, epsilon: float, delta: float, verbose Whether to print intermediate output. verbose_every - Whether to print intermediate output every verbose_every steps. + Whether to print intermediate output every `verbose_every` steps. Returns ------- - Indices of best result options. Number of indices equals min of beam width or nb of candidate anchors. + Indices of best result options. Number of indices equals min of beam width or nb of candidate anchors. """ # n_features equals to the nb of candidate anchors @@ -337,15 +336,15 @@ def draw_samples(self, anchors: list, batch_size: int) -> Tuple[tuple, tuple]: """ Parameters ---------- - anchors - Anchors on which samples are conditioned. - batch_size - The number of samples drawn for each result. + anchors + Anchors on which samples are conditioned. + batch_size + The number of samples drawn for each result. Returns ------- - A tuple of positive samples (for which prediction matches desired label) - and a tuple of total number of samples drawn. + A tuple of positive samples (for which prediction matches desired label) and a tuple of \ + total number of samples drawn. """ for anchor in anchors: @@ -369,10 +368,9 @@ def propose_anchors(self, previous_best: list) -> list: previous_best List with tuples of result candidates. - Returns ------- - List with tuples of candidate anchors with additional metadata. + List with tuples of candidate anchors with additional metadata. """ # compute some variables used later on @@ -424,11 +422,11 @@ def propose_anchors(self, previous_best: list) -> list: def update_state(self, covered_true: np.ndarray, covered_false: np.ndarray, labels: np.ndarray, samples: Tuple[np.ndarray, float], anchor: tuple) -> Tuple[int, int]: """ - Updates the explainer state (see __init__ for full state definition). + Updates the explainer state (see :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam.__init__` + for full state definition). Parameters ---------- - covered_true Examples where the result applies and the prediction is the same as on the instance to be explained. @@ -445,8 +443,8 @@ def update_state(self, covered_true: np.ndarray, covered_false: np.ndarray, labe Returns ------- - A tuple containing the number of instances equals desired label of observation - to be explained the total number of instances sampled, and the result that was sampled + A tuple containing the number of instances equals desired label of observation \ + to be explained the total number of instances sampled, and the result that was sampled. """ # data = binary matrix where 1 means a feature has the same value as the feature in the result @@ -487,12 +485,11 @@ def get_init_stats(self, anchors: list, coverages=False) -> dict: anchors Candidate anchors. coverages - If True, the statistics returned contain the coverage of the specified anchors. + If ``True``, the statistics returned contain the coverage of the specified anchors. Returns ------- - Dictionary with lists containing nb of samples used and where sample predictions equal - the desired label. + Dictionary with lists containing nb of samples used and where sample predictions equal the desired label. """ def array_factory(size: tuple): @@ -512,8 +509,8 @@ def get_anchor_metadata(self, features: tuple, success, batch_size: int = 100) - """ Given the features contained in a result, it retrieves metadata such as the precision and coverage of the result and partial anchors and examples where the result/partial anchors - apply and yield the same prediction as on the instance to be explained (covered_true) - or a different prediction (covered_false). + apply and yield the same prediction as on the instance to be explained (`covered_true`) + or a different prediction (`covered_false`). Parameters ---------- @@ -527,8 +524,7 @@ def get_anchor_metadata(self, features: tuple, success, batch_size: int = 100) - Returns ------- - Anchor dictionary with result features and additional metadata. - :param success: + Anchor dictionary with result features and additional metadata. """ state = self.state @@ -585,25 +581,25 @@ def get_anchor_metadata(self, features: tuple, success, batch_size: int = 100) - def to_sample(means: np.ndarray, ubs: np.ndarray, lbs: np.ndarray, desired_confidence: float, epsilon_stop: float): """ Given an array of mean result precisions and their upper and lower bounds, determines for which anchors - more samples need to be drawn in order to estimate the anchors precision with desired_confidence and error + more samples need to be drawn in order to estimate the anchors precision with `desired_confidence` and error tolerance. Parameters ---------- - means: - Mean precisions (each element represents a different result). - ubs: - Precisions' upper bounds (each element represents a different result). - lbs: - Precisions' lower bounds (each element represents a different result). - desired_confidence: - Desired level of confidence for precision estimation. - epsilon_stop: - Tolerance around desired precision. + means: + Mean precisions (each element represents a different result). + ubs: + Precisions' upper bounds (each element represents a different result). + lbs: + Precisions' lower bounds (each element represents a different result). + desired_confidence: + Desired level of confidence for precision estimation. + epsilon_stop: + Tolerance around desired precision. Returns ------- - Boolean array indicating whether more samples are to be drawn for that particular result. + Boolean array indicating whether more samples are to be drawn for that particular result. """ return ((means >= desired_confidence) & (lbs < desired_confidence - epsilon_stop)) | \ @@ -618,20 +614,20 @@ def anchor_beam(self, delta: float = 0.05, epsilon: float = 0.1, desired_confide """ Uses the KL-LUCB algorithm (Kaufmann and Kalyanakrishnan, 2013) together with additional sampling to search feature sets (anchors) that guarantee the prediction made by a classifier model. The search is greedy if - beam_size=1. Otherwise, at each of the max_anchor_size steps, beam_size solutions are explored. By construction, - solutions found have high precision (defined as the expected of number of times the classifier makes the same - prediction when queried with the feature subset combined with arbitrary samples drawn from a noise distribution) - The algorithm maximises the coverage of the solution found - the frequency of occurrence of records containing - the feature subset in set of samples. + ``beam_size=1``. Otherwise, at each of the `max_anchor_size` steps, `beam_size` solutions are explored. + By construction, solutions found have high precision (defined as the expected of number of times the classifier + makes the same prediction when queried with the feature subset combined with arbitrary samples drawn from a + noise distribution). The algorithm maximises the coverage of the solution found - the frequency of occurrence + of records containing the feature subset in set of samples. Parameters ---------- delta - Used to compute beta. + Used to compute `beta`. epsilon Precision bound tolerance for convergence. desired_confidence - Desired level of precision (tau in paper). + Desired level of precision (`tau` in `paper `_). beam_size Beam width. epsilon_stop @@ -653,8 +649,7 @@ def anchor_beam(self, delta: float = 0.05, epsilon: float = 0.1, desired_confide Returns ------- - Explanation dictionary containing anchors with metadata like coverage and precision - and examples. + Explanation dictionary containing anchors with metadata like coverage and precision and examples. """ # Select coverage set and initialise object state @@ -823,7 +818,7 @@ def anchor_beam(self, delta: float = 0.05, epsilon: float = 0.1, desired_confide class DistributedAnchorBaseBeam(AnchorBaseBeam): if RAY_INSTALLED: import ray - ray = ray + ray = ray #: `ray` module. def __init__(self, samplers: List[Callable], **kwargs) -> None: @@ -843,11 +838,12 @@ def _get_coverage_samples(self, coverage_samples: int, # type: ignore[override] Parameters ---------- - See superclass implementation. + coverage_samples, samplers + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam._get_coverage_samples` implementation. Returns ------- - See superclass implementation. + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam._get_coverage_samples` implementation. """ [coverage_data] = DistributedAnchorBaseBeam.ray.get( @@ -862,11 +858,12 @@ def draw_samples(self, anchors: list, batch_size: int) -> Tuple[np.ndarray, np.n Parameters ---------- - See superclass implementation. + anchors, batch_size + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam.draw_samples` implementation. Returns ------- - Same outputs as superclass but of different types. + See :py:meth:`alibi.explainers.anchor_base.AnchorBaseBeam.draw_samples` implementation. """ # partial anchors not generated by propose_anchors are not in the order dictionary diff --git a/alibi/explainers/anchor_explanation.py b/alibi/explainers/anchor_explanation.py index 971547354..de272557e 100644 --- a/alibi/explainers/anchor_explanation.py +++ b/alibi/explainers/anchor_explanation.py @@ -12,9 +12,9 @@ def __init__(self, exp_type: str, exp_map: dict) -> None: Parameters ---------- exp_type - Type of explainer: tabular, text or image + Type of explainer: tabular, text or image. exp_map - Dictionary with the anchors and explainer metadata for an observation + Dictionary with the anchors and explainer metadata for an observation. """ self.type = exp_type self.exp_map = exp_map @@ -25,12 +25,13 @@ def names(self, partial_index: Optional[int] = None) -> list: ---------- partial_index Get the result until a certain index. - For example, if the result is (A=1,B=2,C=2) and partial_index=1, this will return ["A=1", "B=2"]. + For example, if the result is ``(A=1, B=2, C=2)`` and ``partial_index=1``, this will + return ``["A=1", "B=2"]``. Returns ------- names - Names with the result conditions + Names with the result conditions. """ names = self.exp_map['names'] if partial_index is not None: @@ -43,7 +44,8 @@ def features(self, partial_index: Optional[int] = None) -> list: ---------- partial_index Get the result until a certain index. - For example, if the result uses segment_labels (1, 2, 3) and partial_index=1, this will return [1, 2]. + For example, if the result uses ``segment_labels=(1, 2, 3)`` and ``partial_index=1``, this will + return ``[1, 2]``. Returns ------- @@ -61,12 +63,13 @@ def precision(self, partial_index: Optional[int] = None) -> float: ---------- partial_index Get the result precision until a certain index. - For example, if the result has precisions [0.1, 0.5, 0.95] and partial_index=1, this will return 0.5. + For example, if the result has precisions ``[0.1, 0.5, 0.95]`` and ``partial_index=1``, this will + return ``0.5``. Returns ------- precision - Anchor precision + Anchor precision. """ precision = self.exp_map['precision'] if len(precision) == 0: @@ -82,12 +85,13 @@ def coverage(self, partial_index: Optional[int] = None) -> float: ---------- partial_index Get the result coverage until a certain index. - For example, if the result has precisions [0.1, 0.5, 0.95] and partial_index=1, this will return 0.5. + For example, if the result has precisions ``[0.1, 0.5, 0.95]`` and ``partial_index=1``, this will + return ``0.5``. Returns ------- coverage - Anchor coverage + Anchor coverage. """ coverage = self.exp_map['coverage'] if len(coverage) == 0: @@ -103,15 +107,17 @@ def examples(self, only_different_prediction: bool = False, Parameters ---------- only_different_prediction - If True, will only return examples where the result makes a different prediction than the original model + If ``True``, will only return examples where the result makes a different prediction than the + original model. only_same_prediction - If True, will only return examples where the result makes the same prediction than the original model + If ``True``, will only return examples where the result makes the same prediction than the + original model. partial_index - Get the examples from the partial result until a certain index + Get the examples from the partial result until a certain index. Returns ------- - Examples covered by result + Examples covered by result. """ if only_different_prediction and only_same_prediction: print('Error: you cannot have only_different_prediction and only_same_prediction at the same time') diff --git a/alibi/explainers/anchor_image.py b/alibi/explainers/anchor_image.py index 9615f0e88..c34b30d18 100644 --- a/alibi/explainers/anchor_image.py +++ b/alibi/explainers/anchor_image.py @@ -65,7 +65,7 @@ def __init__( Parameters ---------- predictor - A callable that takes a tensor of N data points as inputs and returns N outputs. + A callable that takes a `numpy` array of `N` data points as inputs and returns `N` outputs. segmentation_fn Function used to segment the images. image @@ -76,7 +76,7 @@ def __init__( Probability for a pixel to be represented by the average value of its superpixel. n_covered_ex How many examples where anchors apply to store for each anchor sampled during search - (both examples where prediction on samples agrees/disagrees with desired_label are stored). + (both examples where prediction on samples agrees/disagrees with `desired_label` are stored). """ self.predictor = predictor self.segmentation_fn = segmentation_fn @@ -101,28 +101,35 @@ def __call__( Parameters ---------- anchor - int: order of anchor in the batch - tuple: features (= superpixels) present in the proposed anchor + - ``int`` - order of anchor in the batch. + - ``tuple`` - features (= superpixels) present in the proposed anchor. num_samples - Number of samples used + Number of samples used. compute_labels - If True, an array of comparisons between predictions on perturbed samples and + If ``True``, an array of comparisons between predictions on perturbed samples and instance to be explained is returned. Returns ------- - If compute_labels=True, a list containing the following is returned: - - covered_true: perturbed examples where the anchor applies and the model prediction - on perturbed is the same as the instance prediction - - covered_false: perturbed examples where the anchor applies and the model prediction - on pertrurbed sample is NOT the same as the instance prediction - - labels: num_samples ints indicating whether the prediction on the perturbed sample - matches (1) the label of the instance to be explained or not (0) - - data: Matrix with 1s and 0s indicating whether the values in a superpixel will - remain unchanged (1) or will be perturbed (0), for each sample - - 1.0: indicates exact coverage is not computed for this algorithm - - anchor[0]: position of anchor in the batch request - Otherwise, a list containing the data matrix only is returned. + If ``compute_labels=True``, a list containing the following is returned + + - `covered_true` - perturbed examples where the anchor applies and the model prediction on perturbed is the \ + same as the instance prediction. + + - `covered_false` - perturbed examples where the anchor applies and the model prediction on pertrurbed sample \ + is NOT the same as the instance prediction. + + - `labels` - `num_samples` ints indicating whether the prediction on the perturbed sample matches (1) \ + the label of the instance to be explained or not (0). + + - `data` - Matrix with 1s and 0s indicating whether the values in a superpixel will remain unchanged (1) or \ + will be perturbed (0), for each sample. + + - `1.0` - indicates exact coverage is not computed for this algorithm. + + - `anchor[0]` - position of anchor in the batch request + + Otherwise, a list containing the data matrix only is returned. """ if compute_labels: @@ -154,7 +161,7 @@ def compare_labels(self, samples: np.ndarray) -> np.ndarray: Returns ------- - A boolean array indicating whether the prediction was the same as the instance label. + A boolean array indicating whether the prediction was the same as the instance label. """ return self.predictor(samples) == self.instance_label @@ -207,9 +214,9 @@ def perturbation( Returns ------- imgs - A [num_samples, H, W, C] array of perturbed images. + A `[num_samples, H, W, C]` array of perturbed images. segments_mask - A [num_samples, M] binary mask, where M is the number of image superpixels + A `[num_samples, M]` binary mask, where `M` is the number of image superpixels segments. 1 indicates the values in that particular superpixels are not perturbed. """ @@ -269,7 +276,7 @@ def generate_superpixels(self, image: np.ndarray) -> np.ndarray: Returns ------- - A [H, W] array of integers. Each integer is a segment (superpixel) label. + A `[H, W]` array of integers. Each integer is a segment (superpixel) label. """ image_preproc = self._preprocess_img(image) @@ -287,7 +294,7 @@ def _preprocess_img(self, image: np.ndarray) -> np.ndarray: Returns ------- - A preprocessed image. + A preprocessed image. """ # Grayscale images are repeated across channels @@ -314,17 +321,17 @@ def __init__(self, Parameters ---------- predictor - A callable that takes a tensor of N data points as inputs and returns N outputs. + A callable that takes a `numpy` array of `N` data points as inputs and returns `N` outputs. image_shape - Shape of the image to be explained. + Shape of the image to be explained. The channel axis is expected to be last. dtype - A numpy scalar type that corresponds to the type of input array expected by `predictor`. This may be + A `numpy` scalar type that corresponds to the type of input array expected by `predictor`. This may be used to construct arrays of the given type to be passed through the `predictor`. For most use cases this argument should have no effect, but it is exposed for use with predictors that would break when called with an array of unsupported type. segmentation_fn - Any of the built in segmentation function strings: 'felzenszwalb', 'slic' or 'quickshift' or a custom - segmentation function (callable) which returns an image mask with labels for each superpixel. + Any of the built in segmentation function strings: ``'felzenszwalb'``, ``'slic'`` or ``'quickshift'`` or + a custom segmentation function (callable) which returns an image mask with labels for each superpixel. See http://scikit-image.org/docs/dev/api/skimage.segmentation.html for more info. segmentation_kwargs Keyword arguments for the built in segmentation functions. @@ -407,7 +414,7 @@ def generate_superpixels(self, image: np.ndarray) -> np.ndarray: Returns ------- - A [H, W] array of integers. Each integer is a segment (superpixel) label. + A `[H, W]` array of integers. Each integer is a segment (superpixel) label. """ image_preproc = self._preprocess_img(image) @@ -425,7 +432,7 @@ def _preprocess_img(self, image: np.ndarray) -> np.ndarray: Returns ------- - A preprocessed image. + A preprocessed image. """ # Grayscale images are repeated across channels @@ -466,7 +473,7 @@ def explain(self, # type: ignore[override] threshold Minimum precision threshold. delta - Used to compute beta. + Used to compute `beta`. tau Margin between lower confidence bound and minimum precision of upper bound. batch_size @@ -476,7 +483,7 @@ def explain(self, # type: ignore[override] beam_size The number of anchors extended at each step of new anchors construction. stop_on_first - If True, the beam search algorithm will return the first anchor that has satisfies the + If ``True``, the beam search algorithm will return the first anchor that has satisfies the probability constraint. max_anchor_size Maximum number of features in result. @@ -484,12 +491,12 @@ def explain(self, # type: ignore[override] Min number of initial samples. n_covered_ex How many examples where anchors apply to store for each anchor sampled during search - (both examples where prediction on samples agrees/disagrees with desired_label are stored). + (both examples where prediction on samples agrees/disagrees with `desired_label` are stored). binary_cache_size - The result search pre-allocates binary_cache_size batches for storing the binary arrays + The result search pre-allocates `binary_cache_size` batches for storing the binary arrays returned during sampling. cache_margin - When only max(cache_margin, batch_size) positions in the binary cache remain empty, a new cache + When only ``max(cache_margin, batch_size)`` positions in the binary cache remain empty, a new cache of the same size is pre-allocated to continue buffering samples. verbose Display updates during the anchor search iterations. @@ -500,6 +507,10 @@ def explain(self, # type: ignore[override] ------- explanation `Explanation` object containing the anchor explaining the instance with additional metadata as attributes. + See usage at `AnchorImage examples`_ for details. + + .. _AnchorImage examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html """ # get params for storage in meta params = locals() @@ -538,11 +549,11 @@ def explain(self, # type: ignore[override] **kwargs, ) # type: Any - return self.build_explanation( + return self._build_explanation( image, result, sampler.instance_label, params, sampler ) - def build_explanation( + def _build_explanation( self, image: np.ndarray, result: dict, @@ -563,7 +574,7 @@ def build_explanation( predicted_label Label of the instance to be explained. params - Parameters passed to `explain` + Parameters passed to `:py:meth:alibi.explainers.anchor_image.AnchorImage.explain`. """ result['instance'] = image @@ -601,7 +612,7 @@ def overlay_mask(self, image: np.ndarray, segments: np.ndarray, mask_features: l image Image to be explained. segments - Superpixels + Superpixels. mask_features List with superpixels present in mask. scale @@ -643,4 +654,12 @@ def _transform_predictor(self, predictor: Callable) -> Callable: return transformer def reset_predictor(self, predictor: Callable) -> None: + """ + Resets the predictor function. + + Parameters + ---------- + predictor + New predictor function. + """ self.predictor = self._transform_predictor(predictor) diff --git a/alibi/explainers/anchor_tabular.py b/alibi/explainers/anchor_tabular.py index 1b52158c0..3946d5b42 100644 --- a/alibi/explainers/anchor_tabular.py +++ b/alibi/explainers/anchor_tabular.py @@ -22,9 +22,11 @@ class TabularSampler: """ A sampler that uses an underlying training set to draw records that have a subset of features with - values specified in an instance to be explained, X.""" + values specified in an instance to be explained, `X`. """ - instance_label: int + # if documented in the Attributes, it will be documented twice. + # Probably related to: https://github.com/sphinx-doc/sphinx/issues/7427 + instance_label: int #: The label of the instance to be explained. def __init__(self, predictor: Callable, disc_perc: Tuple[Union[int, float], ...], numerical_features: List[int], categorical_features: List[int], feature_names: list, feature_values: dict, n_covered_ex: int = 10, @@ -33,9 +35,9 @@ def __init__(self, predictor: Callable, disc_perc: Tuple[Union[int, float], ...] Parameters ---------- predictor - A callable that takes a tensor of N data points as inputs and returns N outputs. + A callable that takes a tensor of `N` data points as inputs and returns `N` outputs. disc_perc - Percentiles used for numerical feat. discretisation. + Percentiles used for numerical feature discretisation. numerical_features Numerical features column IDs. categorical_features @@ -69,20 +71,19 @@ def __init__(self, predictor: Callable, disc_perc: Tuple[Union[int, float], ...] def deferred_init(self, train_data: Union[np.ndarray, Any], d_train_data: Union[np.ndarray, Any]) -> Any: """ - Initialise the Tabular sampler object with data, discretizer, feature statistics and + Initialise the tabular sampler object with data, discretizer, feature statistics and build an index from feature values and bins to database rows for each feature. Parameters ---------- train_data: - Data from which samples are drawn. Can be a numpy array or a ray future. + Data from which samples are drawn. Can be a `numpy` array or a `ray` future. d_train_data: - Discretized version for training data. Can be a numpy array or a ray future. + Discretized version for training data. Can be a `numpy` array or a `ray` future. Returns ------- - An initialised sampler. - + An initialised sampler. """ self._set_data(train_data, d_train_data) @@ -115,7 +116,7 @@ def _set_discretizer(self, disc_perc: Tuple[Union[int, float], ...]) -> None: def _set_numerical_feats_stats(self) -> None: """ - Compute min and max for numerical features so that sampling from this range can be performed if + Compute `min` and `max` for numerical features so that sampling from this range can be performed if a sampling request has bin that is not in the training data. """ @@ -130,7 +131,7 @@ def set_instance_label(self, X: np.ndarray) -> None: Parameters ---------- X - Instance to be explained. + Instance to be explained. """ label = self.predictor(X.reshape(1, -1))[0] # type: int @@ -184,22 +185,29 @@ def __call__(self, anchor: Tuple[int, tuple], num_samples: int, compute_labels=T num_samples Number of samples used when sampling from training set. compute_labels - If True, an array of comparisons between predictions on perturbed samples and instance to be + If ``True``, an array of comparisons between predictions on perturbed samples and instance to be explained is returned. Returns ------- - If compute_labels=True, a list containing the following is returned: - - covered_true: perturbed examples where the anchor applies and the model prediction - on perturbation is the same as the instance prediction - - covered_false: perturbed examples where the anchor applies and the model prediction - is NOT the same as the instance prediction - - labels: num_samples ints indicating whether the prediction on the perturbed sample - matches (1) the label of the instance to be explained or not (0) - - data: Sampled data where ordinal features are binned (1 if in bin, 0 otherwise) - - coverage: the coverage of the anchor - - anchor[0]: position of anchor in the batch request - Otherwise, a list containing the data matrix only is returned. + If ``compute_labels=True``, a list containing the following is returned + + - `covered_true` - perturbed examples where the anchor applies and the model prediction \ + on perturbation is the same as the instance prediction. + + - `covered_false` - perturbed examples where the anchor applies and the model prediction \ + is NOT the same as the instance prediction. + + - `labels` - `num_samples` ints indicating whether the prediction on the perturbed sample \ + matches (1) the label of the instance to be explained or not (0). + + - `data` - Sampled data where ordinal features are binned (1 if in bin, 0 otherwise). + + - `coverage` - the coverage of the anchor. + + - `anchor[0]` - position of anchor in the batch request. + + Otherwise, a list containing the data matrix only is returned. """ raw_data, d_raw_data, coverage = self.perturbation(anchor[1], num_samples) @@ -237,14 +245,14 @@ def compare_labels(self, samples: np.ndarray) -> np.ndarray: Returns ------- - An array of integers indicating whether the prediction was the same as the instance label. + An array of integers indicating whether the prediction was the same as the instance label. """ return self.predictor(samples) == self.instance_label def perturbation(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndarray, float]: """ - Implements functionality described in __call__. + Implements functionality described in :py:meth:`alibi.explainers.anchor_tabular.TabularSampler.__call__`. Parameters ---------- @@ -255,11 +263,10 @@ def perturbation(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np. Returns ------- - samples Sampled data from training set. d_samples - Like samples, but continuous data is converted to oridinal discrete data (binned). + Like samples, but continuous data is converted to ordinal discrete data (binned). coverage The coverage of the result in the training data. """ @@ -324,13 +331,13 @@ def handle_unk_features(self, allowed_bins: Dict[int, Set[int]], num_samples: in Parameters ---------- - allowed_bins: - See get_feature_index method. - num_samples: + allowed_bins + See :py:meth:`alibi.explainers.anchor_tabular.TabularSampler.get_feature_index` method. + num_samples Number of replacement values. - samples: + samples Contains the samples whose values are to be replaced. - unk_feature_values: + unk_feature_values List of tuples where: [0] is original feature id, [1] feature type, [2] if var is categorical, replacement value, otherwise None """ @@ -370,7 +377,7 @@ def replace_features(self, samples: np.ndarray, allowed_rows: Dict[int, Any], un the entire anchor applies. nb_partial_anchors The number of training records which contain each partial anchor. - num_samples: + num_samples Number of perturbed samples to be returned. """ @@ -436,7 +443,7 @@ def get_features_index(self, anchor: tuple) -> \ the same value as the feature in the instance to be explained (for ordinal variables, the row indices are those of rows which contain records with feature values in the same bin). The algorithm uses both the feature *encoded* ids in anchor and the feature ids in the input data set. The two - are mapped by self.enc2feat_idx. + are mapped by `self.enc2feat_idx`. Parameters ---------- @@ -452,8 +459,8 @@ def get_features_index(self, anchor: tuple) -> \ Maps original feature ids to the training set rows where these features have the same value as the anchor. unk_feat_values When a categorical variable with the specified value/discretized variable in the specified bin is not found - in the training set, a tuple is added to unk_feat_values to indicate the original feature id, its type - ('c'=categorical, o='discretized continuous') and the value/bin it should be sampled from. + in the training set, a tuple is added to `unk_feat_values` to indicate the original feature id, its type + (``'c'`` = categorical, ``'o'`` = discretized continuous) and the value/bin it should be sampled from. """ # bins one can sample from for each numerical feature (key: feat id) @@ -494,26 +501,31 @@ def get_features_index(self, anchor: tuple) -> \ def build_lookups(self, X: np.ndarray) -> List[Dict]: """ An encoding of the feature IDs is created by assigning each bin of a discretized numerical variable and each - categorical variable a unique index. For a dataset containg, e.g., a numerical variable with 5 bins and + categorical variable a unique index. For a dataset containing, e.g., a numerical variable with 5 bins and 3 categorical variables, indices 0 - 4 represent bins of the numerical variable whereas indices 5, 6, 7 represent the encoded indices of the categorical variables (but see note for caviats). The encoding is necessary so that the different ranges of the numerical variable can be sampled during result construction. Note that the encoded indices represent the predicates used during the anchor construction process (i.e., and anchor is a collection of encoded indices. - Note: Each continuous variable has n_bins - 1 corresponding entries in ord_lookup. - Parameters --------- X - instance to be explained + Instance to be explained. Returns ------- - a list containing three dictionaries, whose keys are encoded feature IDs: - - cat_lookup: maps categorical variables to their value in X - - ord_lookup: maps discretized numerical variables to the bins they can be sampled from given X - - enc2feat_idx: maps the encoded IDs to the original (training set) feature column IDs + A list containing three dictionaries, whose keys are encoded feature IDs + + - `cat_lookup` - maps categorical variables to their value in `X`. + + - `ord_lookup` - maps discretized numerical variables to the bins they can be sampled from given `X`. + + - `enc2feat_idx` - maps the encoded IDs to the original (training set) feature column IDs. + + Notes + ----- + Each continuous variable has `n_bins - 1` corresponding entries in `ord_lookup`. """ X = self.disc.discretize(X.reshape(1, -1))[0] # map continuous features to ordinal discrete variables @@ -569,10 +581,11 @@ def build_lookups(self, X: np.ndarray) -> List[Dict]: class RemoteSampler: - """ A wrapper that facilitates the use of TabularSampler for distributed sampling.""" + """ A wrapper that facilitates the use of `TabularSampler` for distributed sampling.""" if RAY_INSTALLED: import ray - ray = ray # set module as class variable to used only in this context + # set module as class variable to used only in this context + ray = ray #: `ray` module. def __init__(self, *args): self.train_id, self.d_train_id, self.sampler = args @@ -581,17 +594,14 @@ def __init__(self, *args): def __call__(self, anchors_batch: Union[Tuple[int, tuple], List[Tuple[int, tuple]]], num_samples: int, compute_labels: bool = True) -> List: """ - Wrapper around TabularSampler.__call__. It allows sampling a batch of anchors in the same process, - which can improve performance. + Wrapper around :py:meth:`alibi.explainers.anchor_tabular.TabularSampler.__call__`. It allows sampling a batch + of anchors in the same process, which can improve performance. Parameters ---------- - anchors_batch: - A list of result tuples. see TabularSampler.__call__ for details. - num_samples: - See TabularSampler.__call__. - compute_labels - See TabularSampler.__call__. + anchors_batch, num_samples, compute_labels + A list of result tuples. See :py:meth:`alibi.explainers.anchor_tabular.TabularSampler.__call__` + for details. """ if isinstance(anchors_batch, tuple): # DistributedAnchorBaseBeam._get_samples_coverage call @@ -643,22 +653,22 @@ def _get_sampler(self) -> TabularSampler: Returns ------- - The tabular sampler object that is used in the process. + The tabular sampler object that is used in the process. """ return self.sampler - def build_lookups(self, X): + def build_lookups(self, X: np.ndarray): """ - Wrapper around TabularSampler.build_lookups. + Wrapper around :py:meth:`alibi.explainers.anchor_tabular.TabularSampler.build_lookups`. Parameters -------- X - See TabularSampler.build_lookups. + See :py:meth:`alibi.explainers.anchor_tabular.TabularSampler.build_lookups`. Returns ------- - See TabularSampler.build_lookups. + See :py:meth:`alibi.explainers.anchor_tabular.TabularSampler.build_lookups`. """ cat_lookup_id, ord_lookup_id, enc2feat_idx_id = self.sampler.build_lookups(X) @@ -667,7 +677,7 @@ def build_lookups(self, X): class AnchorTabular(Explainer, FitMixin): - instance_label: int + instance_label: int #: The label of the instance to be explained. def __init__(self, predictor: Callable[[np.ndarray], np.ndarray], @@ -680,13 +690,13 @@ def __init__(self, Parameters ---------- predictor - A callable that takes a tensor of N data points as inputs and returns N outputs. + A callable that takes a `numpy` array of `N` data points as inputs and returns `N` outputs. feature_names List with feature names. categorical_names Dictionary where keys are feature columns and values are the categories for the feature. dtype - A numpy scalar type that corresponds to the type of input array expected by `predictor`. This may be + A `numpy` scalar type that corresponds to the type of input array expected by `predictor`. This may be used to construct arrays of the given type to be passed through the `predictor`. For most use cases this argument should have no effect, but it is exposed for use with predictors that would break when called with an array of unsupported type. @@ -749,7 +759,7 @@ def fit(self, # type: ignore[override] train_data Representative sample from the training data. disc_perc - List with percentiles (int) used for discretization. + List with percentiles (`int`) used for discretization. """ # transform one-hot encodings to labels if ohe == True @@ -779,11 +789,11 @@ def fit(self, # type: ignore[override] def _build_sampling_lookups(self, X: np.ndarray) -> None: """ Build a series of lookup tables used to draw samples with feature subsets identical to - given subsets of X (see TabularSampler.build_sampling_lookups for details). + given subsets of `X` (see TabularSampler.build_sampling_lookups for details). Parameters ---------- - X: + X Instance to be explained. """ @@ -808,7 +818,7 @@ def explain(self, verbose_every: int = 1, **kwargs: Any) -> Explanation: """ - Explain prediction made by classifier on instance X. + Explain prediction made by classifier on instance `X`. Parameters ---------- @@ -817,7 +827,7 @@ def explain(self, threshold Minimum precision threshold. delta - Used to compute beta. + Used to compute `beta`. tau Margin between lower confidence bound and minimum precision or upper bound. batch_size @@ -827,7 +837,7 @@ def explain(self, beam_size The number of anchors extended at each step of new anchors construction. stop_on_first - If True, the beam search algorithm will return the first anchor that has satisfies the + If ``True``, the beam search algorithm will return the first anchor that has satisfies the probability constraint. max_anchor_size Maximum number of features in result. @@ -835,23 +845,26 @@ def explain(self, Min number of initial samples. n_covered_ex How many examples where anchors apply to store for each anchor sampled during search - (both examples where prediction on samples agrees/disagrees with desired_label are stored). + (both examples where prediction on samples agrees/disagrees with `desired_label` are stored). binary_cache_size - The result search pre-allocates binary_cache_size batches for storing the binary arrays + The result search pre-allocates `binary_cache_size` batches for storing the binary arrays returned during sampling. cache_margin - When only max(cache_margin, batch_size) positions in the binary cache remain empty, a new cache + When only ``max(cache_margin, batch_size)`` positions in the binary cache remain empty, a new cache of the same size is pre-allocated to continue buffering samples. verbose Display updates during the anchor search iterations. verbose_every Frequency of displayed iterations during anchor search process. - Returns ------- explanation `Explanation` object containing the result explaining the instance with additional metadata as attributes. + See usage at `AnchorTabular examples`_ for details. + + .. _AnchorTabular examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html """ # transform one-hot encodings to labels if ohe == True X = ohe_to_ord(X_ohe=X.reshape(1, -1), cat_vars_ohe=self.cat_vars_ohe)[0].reshape(-1) if self.ohe else X @@ -890,27 +903,27 @@ def explain(self, ) # type: Any self.mab = mab - return self.build_explanation(X, result, self.instance_label, params) + return self._build_explanation(X, result, self.instance_label, params) - def build_explanation(self, X: np.ndarray, result: dict, predicted_label: int, params: dict) -> Explanation: + def _build_explanation(self, X: np.ndarray, result: dict, predicted_label: int, params: dict) -> Explanation: """ Preprocess search output and return an explanation object containing metdata Parameters ---------- - X: + X Instance to be explained. - result: + result Dictionary with explanation search output and metadata. - predicted_label: + predicted_label Label of the instance to be explained (inferred if not given). params - Parameters passed to `explain` + Parameters passed to :py:meth:`alibi.explainers.anchor_tabular.AnchorTabular.explain`. Return ------ - `Explanation` object containing human readable explanation, metadata, and precision/coverage - info as attributes. + `Explanation` object containing the anchor explaining the instance with additional metadata as attributes. \ + """ self.add_names_to_exp(result) @@ -1064,6 +1077,14 @@ def _transform_ohe_predictor(self, predictor: Callable) -> Callable: return predictor def reset_predictor(self, predictor: Callable) -> None: + """ + Resets the predictor function. + + Parameters + ---------- + predictor + New predictor function. + """ self.predictor = predictor self.samplers[0].predictor = self._predictor @@ -1071,7 +1092,8 @@ def reset_predictor(self, predictor: Callable) -> None: class DistributedAnchorTabular(AnchorTabular): if RAY_INSTALLED: import ray - ray = ray # set module as class variable to used only in this context + # set module as class variable to used only in this context + ray = ray #: `ray` module. def __init__(self, predictor: Callable, @@ -1095,7 +1117,8 @@ def fit(self, # type: ignore[override] Parameters ---------- - See superclass implementation. + train_data, disc_perc, **kwargs + See :py:meth:`alibi.explainers.anchor_tabular.AnchorTabular.fit` superclass. """ try: @@ -1140,12 +1163,12 @@ def fit(self, # type: ignore[override] def _build_sampling_lookups(self, X: np.ndarray) -> None: """ - See superclass documentation. + See :py:meth:`alibi.explainers.anchor_tabular.AnchorTabular._build_sampling_lookups` documentation. Parameters ---------- - X: - See superclass documentation. + X + See :py:meth:`alibi.explainers.anchor_tabular.AnchorTabular._build_sampling_lookups` documentation. """ lookups = [sampler.build_lookups.remote(X) for sampler in self.samplers][0] @@ -1169,16 +1192,18 @@ def explain(self, verbose_every: int = 1, **kwargs: Any) -> Explanation: """ - Explains the prediction made by a classifier on instance X. Sampling is done in parallel over a number of - cores specified in kwargs['ncpu']. + Explains the prediction made by a classifier on instance `X`. Sampling is done in parallel over a number of + cores specified in `kwargs['ncpu']`. Parameters ---------- - See superclass implementation. + X, threshold, delta, tau, batch_size, coverage_samples, beam_size, stop_on_first, max_anchor_size, \ + min_samples_start, n_covered_ex, binary_cache_size, cache_margin, verbose, verbose_every, **kwargs + See :py:meth:`alibi.explainers.anchor_tabular.AnchorTabular.explain`. Returns ------- - See superclass implementation. + See :py:meth:`alibi.explainers.anchor_tabular.AnchorTabular.explain` superclass. """ # transform one-hot encodings to labels if ohe == True X = ohe_to_ord(X_ohe=X.reshape(1, -1), cat_vars_ohe=self.cat_vars_ohe)[0].reshape(-1) if self.ohe else X @@ -1217,9 +1242,17 @@ def explain(self, ) # type: Any self.mab = mab - return self.build_explanation(X, result, self.instance_label, params) + return self._build_explanation(X, result, self.instance_label, params) def reset_predictor(self, predictor: Callable) -> None: + """ + Resets the predictor function. + + Parameters + ---------- + predictor + New model prediction function. + """ raise NotImplementedError("Resetting predictor is currently not supported for distributed explainers.") # TODO: to support resetting a predictor we would need to re-run most of the code in `fit` instantiating the # instances of RemoteSampler anew diff --git a/alibi/explainers/anchor_text.py b/alibi/explainers/anchor_text.py index 3d61ba940..78b80b50b 100644 --- a/alibi/explainers/anchor_text.py +++ b/alibi/explainers/anchor_text.py @@ -62,8 +62,7 @@ def _load_spacy_lexeme_prob(nlp: 'spacy.language.Language') -> 'spacy.language.L return nlp -class Neighbors(object): - +class Neighbors: def __init__(self, nlp_obj: 'spacy.language.Language', n_similar: int = 500, w_prob: float = -15.) -> None: """ Initialize class identifying neighbouring words from the embedding for a given word. @@ -71,7 +70,7 @@ def __init__(self, nlp_obj: 'spacy.language.Language', n_similar: int = 500, w_p Parameters ---------- nlp_obj - spaCy model. + `spaCy` model. n_similar Number of similar words to return. w_prob @@ -98,13 +97,12 @@ def neighbors(self, word: str, tag: str, top_n: int) -> dict: tag Part of speech tag for the words. top_n - Return only top_n neighbors. + Return only `top_n` neighbors. Returns ------- - A dict with two fields. The 'words' field contains a numpy array - of the top_n most similar words, whereas the fields similarity is - a numpy array with corresponding word similarities. + A dict with two fields. The ``'words'`` field contains a `numpy` array of the `top_n` most similar words, \ + whereas the fields ``'similarities'`` is a `numpy` array with corresponding word similarities. """ # the word itself is excluded so we add one to return the expected number of words @@ -144,18 +142,18 @@ def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndar def _joiner(self, arr: np.ndarray, dtype: Optional[Type[np.generic]] = None) -> np.ndarray: """ - Function to concatenate an np.array of strings along a specified axis. + Function to concatenate a `numpy` array of strings along a specified axis. Parameters ---------- arr - 1D numpy array of strings. + 1D `numpy` array of strings. dtype Array type, used to avoid truncation of strings when concatenating along axis. Returns ------- - Array with one element, the concatenation of the strings in the input array. + Array with one element, the concatenation of the strings in the input array. """ if not dtype: return np.array(' '.join(arr)) @@ -164,7 +162,7 @@ def _joiner(self, arr: np.ndarray, dtype: Optional[Type[np.generic]] = None) -> class UnknownSampler(AnchorTextSampler): - UNK = "UNK" + UNK: str = "UNK" #: Unknown token to be used. def __init__(self, nlp: 'spacy.language.Language', perturb_opts: Dict): """ @@ -173,7 +171,7 @@ def __init__(self, nlp: 'spacy.language.Language', perturb_opts: Dict): Parameters ---------- nlp - spaCy object. + `spaCy` object. perturb_opts Perturbation options. """ @@ -206,8 +204,8 @@ def set_text(self, text: str) -> None: def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndarray]: """ - The function returns an np.array of num_samples where randomly chosen features, - except those in anchor, are replaced by self.UNK token. + The function returns a `numpy` array of `num_samples` where randomly chosen features, + except those in anchor, are replaced by ``'UNK'`` token. Parameters ---------- @@ -221,7 +219,7 @@ def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndar raw Array containing num_samples elements. Each element is a perturbed sentence. data - A (num_samples, m)-dimensional boolean array, where m is the number of tokens + A `(num_samples, m)`-dimensional boolean array, where `m` is the number of tokens in the instance to be explained. """ assert self.perturb_opts, "Perturbation options are not set." @@ -250,7 +248,7 @@ def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndar def set_data_type(self) -> None: """ - Working with numpy arrays of strings requires setting the data type to avoid + Working with `numpy` arrays of strings requires setting the data type to avoid truncating examples. This function estimates the longest sentence expected during the sampling process, which is used to set the number of characters for the samples and examples arrays. This depends on the perturbation method @@ -270,7 +268,7 @@ def __init__(self, nlp: 'spacy.language.Language', perturb_opts: Dict): Parameters ---------- nlp - spaCy object. + `spaCy` object. perturb_opts Perturbation options. @@ -298,7 +296,7 @@ def set_text(self, text: str) -> None: Parameters ---------- text - Text to be processed. + Text to be processed. """ processed = self.nlp(text) # spaCy tokens for text self.words = [x.text for x in processed] # list with words in text @@ -314,10 +312,10 @@ def set_text(self, text: str) -> None: def find_similar_words(self) -> None: """ - This function queries a spaCy nlp model to find n similar words with the same + This function queries a `spaCy` nlp model to find `n` similar words with the same part of speech for each word in the instance to be explained. For each word - the search procedure returns a dictionary containing an np.array of words ('words') - and an np.array of word similarities ('similarities'). + the search procedure returns a dictionary containing a `numpy` array of words (``'words'``) + and a `numpy` array of word similarities (``'similarities'``). """ for word, token in zip(self.words, self.tokens): if word not in self.synonyms: @@ -325,9 +323,10 @@ def find_similar_words(self) -> None: def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndarray]: """ - The function returns an np.array of num_samples where randomly chosen features, - except those in anchor, are replaced by similar words with the same part of - speech of tag. See self.perturb_sentence for details of how the replacement works. + The function returns a `numpy` array of `num_samples` where randomly chosen features, + except those in anchor, are replaced by similar words with the same part of speech of tag. + See :py:meth:`alibi.explainers.anchor_text.SimilaritySampler.perturb_sentence_similarity` for details of how + the replacement works. Parameters ---------- @@ -338,7 +337,7 @@ def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndar Returns ------- - See perturb_sentence_similarity + See :py:meth:`alibi.explainers.anchor_text.SimilaritySampler.perturb_sentence`. """ assert self.perturb_opts, "Perturbation options are not set." return self.perturb_sentence_similarity(anchor, num_samples, **self.perturb_opts) @@ -364,7 +363,7 @@ def perturb_sentence_similarity(self, n Number of samples used when sampling from the corpus. sample_proba - Sample probability for a word if use_proba is False. + Sample probability for a word if `use_proba=False`. forbidden Forbidden lemmas. forbidden_tags @@ -376,15 +375,16 @@ def perturb_sentence_similarity(self, use_proba Bool whether to sample according to a similarity score with the corpus embeddings. temperature - Sample weight hyperparameter if use_proba equals True. + Sample weight hyper-parameter if ``use_proba=True``. + **kwargs + Other arguments. Not used. Returns ------- raw Array of perturbed text instances. data - Matrix with 1s and 0s indicating whether a word in the text - has not been perturbed for each sample. + Matrix with 1s and 0s indicating whether a word in the text has not been perturbed for each sample. """ # allocate memory for the binary mask and the perturbed instances raw = np.zeros((n, len(self.tokens)), self.dtype) @@ -426,7 +426,7 @@ def perturb_sentence_similarity(self, def set_data_type(self) -> None: """ - Working with numpy arrays of strings requires setting the data type to avoid + Working with `numpy` arrays of strings requires setting the data type to avoid truncating examples. This function estimates the longest sentence expected during the sampling process, which is used to set the number of characters for the samples and examples arrays. This depends on the perturbation method @@ -445,15 +445,15 @@ def set_data_type(self) -> None: class LanguageModelSampler(AnchorTextSampler): # filling procedures - FILLING_PARALLEL = 'parallel' - FILLING_AUTOREGRESSIVE = 'autoregressive' + FILLING_PARALLEL: str = 'parallel' #: Parallel filling procedure. + FILLING_AUTOREGRESSIVE = 'autoregressive' #: Autoregressive filling procedure. Considerably slow. def __init__(self, model: LanguageModel, perturb_opts: dict, ): """ Initialize language model sampler. This sampler replaces words with the ones sampled according to the output distribution of the language model. There are - two modes to use the sampler: `parallel` and `autoregressive`. In the parallel - mode, all words are replaced simultaneously. In the `autoregressive` model, the words + two modes to use the sampler: ``'parallel'`` and ``'autoregressive'``. In the ``'parallel'`` + mode, all words are replaced simultaneously. In the ``'autoregressive'`` model, the words are replaced one by one, starting from left to right. Thus the following words are conditioned on the previous predicted words. @@ -508,6 +508,8 @@ def get_sample_ids(self, String of punctuation characters. stopwords List of stopwords. + **kwargs + Other arguments. Not used. """ # transform stopwords to lowercase if stopwords: @@ -568,7 +570,7 @@ def set_text(self, text: str) -> None: def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndarray]: """ - The function returns an np.array of num_samples where randomly chosen features, + The function returns a `numpy` array of `num_samples` where randomly chosen features, except those in anchor, are replaced by words sampled according to the language model's predictions. @@ -581,7 +583,7 @@ def __call__(self, anchor: tuple, num_samples: int) -> Tuple[np.ndarray, np.ndar Returns ------- - See perturb_sentence + See :py:meth:`alibi.explainers.anchor_text.LanguageModelSampler.perturb_sentence`. """ assert self.perturb_opts, "Perturbation options are not set." return self.perturb_sentence(anchor, num_samples, **self.perturb_opts) @@ -595,7 +597,7 @@ def perturb_sentence(self, filling: str = "parallel", **kwargs) -> Tuple[np.ndarray, np.ndarray]: """ - The function returns an np.array of num_samples where randomly chosen features, + The function returns an `numpy` array of `num_samples` where randomly chosen features, except those in anchor, are replaced by words sampled according to the language model's predictions. @@ -612,14 +614,16 @@ def perturb_sentence(self, batch_size_lm: Batch size used for language model. filling: - Method to fill masked words. Either `parallel` or `autoregressive`. + Method to fill masked words. Either ``'parallel'`` or ``'autoregressive'``. + **kwargs + Other arguments to be passed to other methods. Returns ------- raw - Array containing num_samples elements. Each element is a perturbed sentence. + Array containing `num_samples` elements. Each element is a perturbed sentence. data - A (num_samples, m)-dimensional boolean array, where m is the number of tokens + A `(num_samples, m)`-dimensional boolean array, where `m` is the number of tokens in the instance to be explained. """ # Create the mask @@ -666,16 +670,18 @@ def create_mask(self, sample_proba Probability of a word being replaced. filling: - Method to fill masked words. Either `parallel` or `autoregressive`. + Method to fill masked words. Either ``'parallel'`` or ``'autoregressive'``. frac_mask_templates Fraction of mask templates from the number of requested samples. + **kwargs + Other arguments to be passed to other methods. Returns ------- raw Array with masked instances. data - A (num_samples, m)-dimensional boolean array, where m is the number of tokens + A `(num_samples, m)`-dimensional boolean array, where `m` is the number of tokens in the instance to be explained. """ # make sure that frac_mask_templates is in [0, 1] @@ -758,18 +764,18 @@ def _append_tail(self, raw: np.ndarray) -> np.ndarray: def _joiner(self, arr: np.ndarray, dtype: Optional[Type[np.generic]] = None) -> np.ndarray: """ - Function to concatenate an np.array of strings along a specified axis. + Function to concatenate an `numpy` array of strings along a specified axis. Parameters ---------- arr - 1D numpy array of strings. + 1D `numpy` array of strings. dtype - Array type, used to avoid truncation of strings when concatenating along axis. + Array type, used to avoid truncation of strings when concatenating along axis. Returns ------- - Array with one element, the concatenation of the strings in the input array. + Array with one element, the concatenation of the strings in the input array. """ filtered_arr = list(filter(lambda x: len(x) > 0, arr)) str_arr = self.model.tokenizer.convert_tokens_to_string(filtered_arr) @@ -795,8 +801,7 @@ def fill_mask(self, raw Array of mask templates. data - Binary mask having 0 where the word was masked. Note that - the width of the data array is equal to the length of id_samples. + Binary mask having 0 where the word was masked. num_samples Number of samples to be drawn. top_n: @@ -804,12 +809,14 @@ def fill_mask(self, batch_size_lm: Batch size used for language model. filling - Method to fill masked words. Either `parallel` or `autoregressive`. + Method to fill masked words. Either ``'parallel'`` or ``'autoregressive'``. + **kwargs + Other paremeters to be passed to other methods. Returns ------- raw - Array containing num_samples elements. Each element is a perturbed sentence. + Array containing `num_samples` elements. Each element is a perturbed sentence. """ # chose the perturbation function perturb_func = self._perturb_instances_parallel if filling == self.FILLING_PARALLEL \ @@ -827,10 +834,10 @@ def fill_mask(self, def _remove_subwords(self, raw: np.ndarray, row: int, col: int, punctuation: str = '', **kwargs) -> np.ndarray: """ - Deletes the subwords that follow a given token identified by the (row, col) pair in the `raw` matrix. + Deletes the subwords that follow a given token identified by the `(row, col)` pair in the `raw` matrix. A token is considered to be part of a word if is not a punctuation and if has the subword prefix specific to the used language model. The subwords are not actually deleted in, but they are replace - by the empty string `` + by the empty string ``''``. Parameters ---------- @@ -889,15 +896,15 @@ def _perturb_instances_parallel(self, Sample weight hyper-parameter. use_proba Bool whether to sample according to the predicted words distribution + **kwargs + Other arguments. Not used. Returns ------- sampled_tokens - Tensor containing the ids of the sampled tokens. - Has `num_samples` rows. + Array containing the ids of the sampled tokens. Has `num_samples` rows. sampled_data - Binary array having 0 where the tokens were masked. - Has `num_samples` rows. + Binary array having 0 where the tokens were masked. Has `num_samples` rows. """ # tokenize instances tokens_plus = self.model.tokenizer.batch_encode_plus(list(raw), padding=True, return_tensors='tf') @@ -973,6 +980,7 @@ def _perturb_instance_ar(self, """ Perturb the instances in an autoregressive fashion (sequential). + Parameters ---------- num_samples Number of samples to be generated. @@ -988,15 +996,15 @@ def _perturb_instance_ar(self, Sample weight hyper-parameter. use_proba Bool whether to sample according to the predicted words distribution. + **kwargs + Other arguments. Not used. Returns ------- sampled_tokens - Tensor containing the ids of the sampled tokens. - Has `num_samples` rows. + Array containing the ids of the sampled tokens. Has `num_samples` rows. sampled_data - Binary array havin 0 where the tokens were masked. - Has `num_samples` rows. + Binary array having 0 where the tokens were masked. Has `num_samples` rows. """ # number of samples to generate per mask template assert num_samples == raw.shape[0] @@ -1058,7 +1066,7 @@ def _perturb_instance_ar(self, def set_data_type(self) -> None: """ - Working with numpy arrays of strings requires setting the data type to avoid + Working with `numpy` arrays of strings requires setting the data type to avoid truncating examples. This function estimates the longest sentence expected during the sampling process, which is used to set the number of characters for the samples and examples arrays. This depends on the perturbation method @@ -1089,9 +1097,9 @@ def set_data_type(self) -> None: "sample_proba": 0.5 } """ -Default perturbation options for `unknown` sampling +Default perturbation options for ``'unknown'`` sampling - - ``'sample_proba'``: float, probability of a word to be masked. + - ``'sample_proba'`` : ``float`` - probability of a word to be masked. """ DEFAULT_SAMPLING_SIMILARITY = { @@ -1101,15 +1109,15 @@ def set_data_type(self) -> None: "use_proba": False } """ -Default perturbation options for `similarity` sampling +Default perturbation options for ``'similarity'`` sampling - - ``'sample_proba'``: float, probability of a word to be masked. + - ``'sample_proba'`` : ``float`` - probability of a word to be masked. - - ``'top_n'``: int, number of similar words to sample for perturbations + - ``'top_n'`` : ``int`` - number of similar words to sample for perturbations. - - ``'temperature'``: float, sample weight hyper-parameter if `use_proba` equals `True`. + - ``'temperature'`` : ``float`` - sample weight hyper-parameter if `use_proba=True`. - - ``'use_proba'``: bool, whether to sample according to the words similarity. + - ``'use_proba'`` : ``bool`` - whether to sample according to the words similarity. """ DEFAULT_SAMPLING_LANGUAGE_MODEL = { @@ -1125,48 +1133,48 @@ def set_data_type(self) -> None: "sample_punctuation": False, } """ -Default perturbation options for `similarity` sampling +Default perturbation options for ``'language_model'`` sampling - - ``'filling'``: str, filling method for language models. Allowed values: `parallel`, `autoregressive`. \ - `parallel` method corresponds to a single forward pass through the language model. The masked words are sampled \ - independently, according to the selected probability distribution (see `top_n`, `temperature`, `use_proba`). \ - `autoregressive` method fills the words one at the time. This corresponds to multiple forward passes through \ - the language model which is computationally expensive. + - ``'filling'`` : ``str`` - filling method for language models. Allowed values: ``'parallel'``, \ + ``'autoregressive'``. ``'parallel'`` method corresponds to a single forward pass through the language model. The \ + masked words are sampled independently, according to the selected probability distribution (see `top_n`, \ + `temperature`, `use_proba`). `autoregressive` method fills the words one at the time. This corresponds to \ + multiple forward passes through the language model which is computationally expensive. - - ``'sample_proba'``: float, probability of a word to be masked. + - ``'sample_proba'`` : ``float`` - probability of a word to be masked. - - ``'top_n'``: int, number of similar words to sample for perturbations. + - ``'top_n'`` : ``int`` - number of similar words to sample for perturbations. - - ``'temperature'``: float, sample weight hyper-parameter if use_proba equals True. + - ``'temperature'`` : ``float`` - sample weight hyper-parameter if use_proba equals ``True``. - - ``'use_proba'``: bool, whether to sample according to the predicted words distribution. If set to `False`, \ - the `top_n` words are sampled uniformly at random. + - ``'use_proba'`` : ``bool`` - whether to sample according to the predicted words distribution. If set to \ + ``False``, the `top_n` words are sampled uniformly at random. - - ``frac_mask_template'``: float, fraction from the number of samples of mask templates to be generated. \ - In each sampling call, will generate int(`frac_mask_templates` * `num_samples`) masking templates. \ + - ``'frac_mask_template'`` : ``float`` - fraction from the number of samples of mask templates to be generated. \ + In each sampling call, will generate `int(frac_mask_templates * num_samples)` masking templates. \ Lower fraction corresponds to lower computation time since the batch fed to the language model is smaller. \ After the words' distributions is predicted for each mask, a total of `num_samples` will be generated by sampling \ - evenly from each template. Note that lower fraction might correspond to less diverse sample. A `sample_proba` \ - is `1` corresponds to masking each word. For this case only one masking template will be constructed. \ - A `filling` set to `autoregressive` will generate `num_samples` masking templates regardless of the value \ + evenly from each template. Note that lower fraction might correspond to less diverse sample. A `sample_proba=1` \ + corresponds to masking each word. For this case only one masking template will be constructed. \ + A `filling='autoregressive'` will generate `num_samples` masking templates regardless of the value \ of `frac_mask_templates`. - - ``batch_size_lm``: int, batch size used for the language model forward pass. + - ``'batch_size_lm'`` : ``int`` - batch size used for the language model forward pass. - - ``punctuation``: str, string of punctuation not to be masked. + - ``'punctuation'`` : ``str`` - string of punctuation not to be masked. - - ``stopwords``: List[str], list of words not to be masked. + - ``'stopwords'`` : ``List[str]`` - list of words not to be masked. - - ``sample_punctuation``: bool, whether to sample punctuation to fill the masked words. If `False`, the \ + - ``'sample_punctuation'`` : ``bool`` - whether to sample punctuation to fill the masked words. If ``False``, the \ punctuation defined in `punctuation` will not be sampled. """ class AnchorText(Explainer): # sampling methods - SAMPLING_UNKNOWN = 'unknown' - SAMPLING_SIMILARITY = 'similarity' - SAMPLING_LANGUAGE_MODEL = 'language_model' + SAMPLING_UNKNOWN = 'unknown' #: Unknown sampling strategy. + SAMPLING_SIMILARITY = 'similarity' #: Similarity sampling strategy. + SAMPLING_LANGUAGE_MODEL = 'language_model' #: Language model sampling strategy. # default params DEFAULTS: Dict[str, Dict] = { @@ -1195,25 +1203,31 @@ def __init__(self, Parameters ---------- predictor - A callable that takes a list of text strings representing N data points as inputs and returns N outputs. + A callable that takes a list of text strings representing `N` data points as inputs and returns `N` outputs. sampling_strategy - Perturbation distribution method. - `unknown` - replaces words with UNKs. - `similarity` - samples according to a similarity score with the corpus embeddings. - `language_model` - samples according the language model's output distributions. + Perturbation distribution method: + + - ``'unknown'`` - replaces words with UNKs. + + - ``'similarity'`` - samples according to a similarity score with the corpus embeddings. + + - ``'language_model'`` - samples according the language model's output distributions. + nlp - spaCy object when sampling method is `unknown` or `similarity`. + `spaCy` object when sampling method is ``'unknown'`` or ``'similarity'``. language_model Transformers masked language model. This is a model that it adheres to the `LanguageModel` interface we define in :py:class:`alibi.utils.lang_model.LanguageModel`. seed If set, ensure identical random streams. kwargs - Sampling parameters can be passed as `kwargs` depending on the `sampling_strategy`. - Check default parameters defined in: + Sampling arguments can be passed as `kwargs` depending on the `sampling_strategy`. + Check default arguments defined in: - :py:data:`alibi.explainers.anchor_text.DEFAULT_SAMPLING_UNKNOWN` + - :py:data:`alibi.explainers.anchor_text.DEFAULT_SAMPLING_SIMILARITY` + - :py:data:`alibi.explainers.anchor_text.DEFAULT_SAMPLING_LANGUAGE_MODEL` Raises @@ -1231,14 +1245,15 @@ def __init__(self, # define model which can be either spacy object or LanguageModel # the initialization of the model happens in _validate_kwargs - self.model: Union['spacy.language.Language', LanguageModel] + self.model: Union['spacy.language.Language', LanguageModel] #: Language model to be used. # validate kwargs self.perturb_opts, all_opts = self._validate_kwargs(sampling_strategy=sampling_strategy, nlp=nlp, language_model=language_model, **kwargs) # set perturbation - self.perturbation: Any = self.CLASS_SAMPLER[self.sampling_strategy](self.model, self.perturb_opts) + self.perturbation: Any = \ + self.CLASS_SAMPLER[self.sampling_strategy](self.model, self.perturb_opts) #: Perturbation method. # update metadata self.meta['params'].update(seed=seed) @@ -1310,28 +1325,36 @@ def sampler(self, anchor: Tuple[int, tuple], num_samples: int, compute_labels: b Parameters ---------- anchor - int: the position of the anchor in the input batch - tuple: the anchor itself, a list of words to be kept unchanged + - ``int`` - the position of the anchor in the input batch. + + - ``tuple`` - the anchor itself, a list of words to be kept unchanged. + num_samples Number of generated perturbed samples. compute_labels - If True, an array of comparisons between predictions on perturbed samples and + If ``True``, an array of comparisons between predictions on perturbed samples and instance to be explained is returned. Returns ------- - If compute_labels=True, a list containing the following is returned: - - covered_true: perturbed examples where the anchor applies and the model prediction - on perturbation is the same as the instance prediction - - covered_false: perturbed examples where the anchor applies and the model prediction - is NOT the same as the instance prediction - - labels: num_samples ints indicating whether the prediction on the perturbed sample - matches (1) the label of the instance to be explained or not (0) - - data: Matrix with 1s and 0s indicating whether a word in the text has been - perturbed for each sample - - 1.0: indicates exact coverage is not computed for this algorithm - - anchor[0]: position of anchor in the batch request - Otherwise, a list containing the data matrix only is returned. + If ``compute_labels=True``, a list containing the following is returned + + - `covered_true` - perturbed examples where the anchor applies and the model prediction \ + on perturbation is the same as the instance prediction. + + - `covered_false` - perturbed examples where the anchor applies and the model prediction \ + is NOT the same as the instance prediction. + + - `labels` - num_samples ints indicating whether the prediction on the perturbed sample \ + matches (1) the label of the instance to be explained or not (0). + + - `data` - Matrix with 1s and 0s indicating whether a word in the text has been perturbed for each sample. + + - `-1.0` - indicates exact coverage is not computed for this algorithm. + + - `anchor[0]` - position of anchor in the batch request. + + Otherwise, a list containing the data matrix only is returned. """ raw_data, data = self.perturbation(anchor[1], num_samples) @@ -1360,7 +1383,7 @@ def compare_labels(self, samples: np.ndarray) -> np.ndarray: Returns ------- - A boolean array indicating whether the prediction was the same as the instance label. + A `numpy` boolean array indicating whether the prediction was the same as the instance label. """ return self.predictor(samples.tolist()) == self.instance_label @@ -1391,7 +1414,7 @@ def explain(self, # type: ignore[override] threshold Minimum precision threshold. delta - Used to compute beta. + Used to compute `beta`. tau Margin between lower confidence bound and minimum precision or upper bound. batch_size @@ -1401,7 +1424,7 @@ def explain(self, # type: ignore[override] beam_size Number of options kept after each stage of anchor building. stop_on_first - If True, the beam search algorithm will return the first anchor that has satisfies the + If ``True``, the beam search algorithm will return the first anchor that has satisfies the probability constraint. max_anchor_size Maximum number of features to include in an anchor. @@ -1411,22 +1434,29 @@ def explain(self, # type: ignore[override] How many examples where anchors apply to store for each anchor sampled during search (both examples where prediction on samples agrees/disagrees with predicted label are stored). binary_cache_size - The anchor search pre-allocates binary_cache_size batches for storing the boolean arrays + The anchor search pre-allocates `binary_cache_size` batches for storing the boolean arrays returned during sampling. cache_margin - When only max(cache_margin, batch_size) positions in the binary cache remain empty, a new cache + When only ``max(cache_margin, batch_size)`` positions in the binary cache remain empty, a new cache of the same size is pre-allocated to continue buffering samples. - kwargs - Other keyword arguments passed to the anchor beam search and the text sampling and perturbation functions. verbose Display updates during the anchor search iterations. verbose_every Frequency of displayed iterations during anchor search process. + **kwargs + Other keyword arguments passed to the anchor beam search and the text sampling and perturbation functions. Returns ------- - explanation - `Explanation` object containing the anchor explaining the instance with additional metadata as attributes. + `Explanation` object containing the anchor explaining the instance with additional metadata as attributes. \ + Contains the following data-related attributes + + - `anchor` : ``List[str]`` - a list of words in the proposed anchor. + + - `precision` : ``float`` - the fraction of times the sampled instances where the anchor holds yields \ + the same prediction as the original instance. The precision will always be threshold for a valid anchor. + + - `coverage` : ``float`` - the fraction of sampled instances the anchor applies to. """ # get params for storage in meta params = locals() @@ -1482,10 +1512,11 @@ def explain(self, # type: ignore[override] # set mab self.mab = mab - return self.build_explanation(text, result, self.instance_label, params) + return self._build_explanation(text, result, self.instance_label, params) - def build_explanation(self, text: str, result: dict, predicted_label: int, params: dict) -> Explanation: - """ Uses the metadata returned by the anchor search algorithm together with + def _build_explanation(self, text: str, result: dict, predicted_label: int, params: dict) -> Explanation: + """ + Uses the metadata returned by the anchor search algorithm together with the instance to be explained to build an explanation object. Parameters @@ -1497,7 +1528,7 @@ def build_explanation(self, text: str, result: dict, predicted_label: int, param predicted_label Label of the instance to be explained. Inferred if not received. params - Parameters passed to `explain`. + Arguments passed to `explain`. """ result['instance'] = text @@ -1541,6 +1572,14 @@ def _transform_predictor(self, predictor: Callable) -> Callable: return transformer def reset_predictor(self, predictor: Callable) -> None: + """ + Resets the predictor function. + + Parameters + ---------- + predictor + New predictor function. + """ self.predictor = self._transform_predictor(predictor) def _seed(self, seed: int) -> None: diff --git a/alibi/explainers/backends/cfrl_base.py b/alibi/explainers/backends/cfrl_base.py index 136311a34..a35717f27 100644 --- a/alibi/explainers/backends/cfrl_base.py +++ b/alibi/explainers/backends/cfrl_base.py @@ -33,10 +33,6 @@ def generate_empty_condition(X: Any) -> None: ---------- X Input instance. - - Returns - -------- - None """ return None @@ -56,7 +52,7 @@ def get_classification_reward(Y_pred: np.ndarray, Y_true: np.ndarray): Returns ------- - Classification reward per instance. 1 if the most likely classes match, 0 otherwise. + Classification reward per instance. 1 if the most likely classes match, 0 otherwise. """ if len(Y_pred.shape) != 2: raise ValueError("Prediction labels should be a 2D array for classification task.") @@ -82,7 +78,7 @@ def get_hard_distribution(Y: np.ndarray, num_classes: Optional[int] = None) -> n Returns ------- - Hard label distribution (one-hot encoding). + Hard label distribution (one-hot encoding). """ if len(Y.shape) == 1 or (len(Y.shape) == 2 and Y.shape[1] == 1): if num_classes is None: @@ -118,7 +114,7 @@ def predict_batches(X: np.ndarray, predictor: Callable, batch_size: int) -> np.n Returns ------- - Classification labels. + Classification labels. """ n_minibatch = int(np.ceil(X.shape[0] / batch_size)) Y_m = [] diff --git a/alibi/explainers/backends/cfrl_tabular.py b/alibi/explainers/backends/cfrl_tabular.py index da7e037bb..feb77e899 100644 --- a/alibi/explainers/backends/cfrl_tabular.py +++ b/alibi/explainers/backends/cfrl_tabular.py @@ -30,7 +30,7 @@ def get_conditional_dim(feature_names: List[str], category_map: Dict[int, List[s Returns ------- - Dimension of the conditional vector + Dimension of the conditional vector """ cat_feat = int(np.sum([len(vals) for vals in category_map.values()])) num_feat = len(feature_names) - len(category_map) @@ -55,7 +55,7 @@ def split_ohe(X_ohe: 'Union[np.ndarray, torch.Tensor, tf.Tensor]', Returns ------- X_ohe_num_split - List of numerical heads. If different than `None`, the list's size is 1. + List of numerical heads. If different than ``None``, the list's size is 1. X_ohe_cat_split List of categorical one-hot encoded heads. """ @@ -92,13 +92,13 @@ def generate_numerical_condition(X_ohe: np.ndarray, """ Generates numerical features conditional vector. For numerical features with a minimum value `a_min` and a maximum value `a_max`, we include in the conditional vector the values `-p_min`, `p_max`, where `p_min, p_max` - are in `[0, 1]`. The range `[-p_min, p_max]` encodes a shift and scale-invariant representation of the interval + are in [0, 1]. The range `[-p_min, p_max]` encodes a shift and scale-invariant representation of the interval `[a - p_min * (a_max - a_min), a + p_max * (a_max - a_min)], where `a` is the original feature value. During training, `p_min` and `p_max` are sampled from `Beta(2, 2)` for each unconstrained feature. Immutable features can be encoded by `p_min = p_max = 0` or listed in `immutable_features` list. Features allowed to increase or - decrease only correspond to setting `p_min = 0` or `p_max = 0`, respectively. For example, allowing the `age` + decrease only correspond to setting `p_min = 0` or `p_max = 0`, respectively. For example, allowing the ``'Age'`` feature to increase by up to 5 years is encoded by taking `p_min = 0`, `p_max=0.1`, assuming the minimum age of - `10` and the maximum age of `60` years in the training set: `5 = 0.1 * (60 - 10)`. + 10 and the maximum age of 60 years in the training set: `5 = 0.1 * (60 - 10)`. Parameters ---------- @@ -115,15 +115,15 @@ def generate_numerical_condition(X_ohe: np.ndarray, Dictionary of ranges for numerical features. Each value is a list containing two elements, first one negative and the second one positive. immutable_features - Dictionary of immutable features. The keys are the column indexes and the values are booleans: `True` if - the feature is immutable, `False` otherwise. + Dictionary of immutable features. The keys are the column indexes and the values are booleans: ``True`` if + the feature is immutable, ``False`` otherwise. conditional - Boolean flag to generate a conditional vector. If `False` the conditional vector does not impose any + Boolean flag to generate a conditional vector. If ``False`` the conditional vector does not impose any restrictions on the feature value. Returns ------- - Conditional vector for numerical features. + Conditional vector for numerical features. """ num_cond = [] size = X_ohe.shape[0] @@ -167,8 +167,8 @@ def generate_categorical_condition(X_ohe: np.ndarray, Generates categorical features conditional vector. For a categorical feature of cardinality `K`, we condition the subset of allowed feature through a binary mask of dimension `K`. When training the counterfactual generator, the mask values are sampled from `Bern(0.5)`. For immutable features, only the original input feature value is - set to one in the binary mask. For example, the immutability of the `marital_status` having the current - value `married` is encoded through the binary sequence `[1, 0, 0]`, given an ordering of the possible feature + set to one in the binary mask. For example, the immutability of the ``'marital_status'`` having the current + value ``'married'`` is encoded through the binary sequence [1, 0, 0], given an ordering of the possible feature values `[married, unmarried, divorced]`. Parameters @@ -185,12 +185,12 @@ def generate_categorical_condition(X_ohe: np.ndarray, immutable_features List of immutable features. conditional - Boolean flag to generate a conditional vector. If `False` the conditional vector does not impose any + Boolean flag to generate a conditional vector. If ``False`` the conditional vector does not impose any restrictions on the feature value. Returns ------- - Conditional vector for categorical feature. + Conditional vector for categorical feature. """ C_cat = [] # define list of conditional vector for each feature @@ -237,8 +237,8 @@ def generate_condition(X_ohe: np.ndarray, One-hot encoding representation of the element(s) for which the conditional vector will be generated. This method assumes that the input array, `X_ohe`, is has the first columns corresponding to the numerical features, and the rest are one-hot encodings of the categorical columns. The numerical and the - categorical columns are ordered by the original column index( e.g. numerical = (1, 4), - categorical=(0, 2, 3)). + categorical columns are ordered by the original column index( e.g., `numerical = (1, 4)`, + `categorical=(0, 2, 3)`). feature_names List of feature names. category_map @@ -250,12 +250,12 @@ def generate_condition(X_ohe: np.ndarray, immutable_features List of immutable map features. conditional - Boolean flag to generate a conditional vector. If `False` the conditional vector does not impose any + Boolean flag to generate a conditional vector. If ``False`` the conditional vector does not impose any restrictions on the feature value. Returns ------- - Conditional vector. + Conditional vector. """ # Define conditional vector buffer C = [] @@ -306,7 +306,7 @@ def sample_numerical(X_hat_num_split: List[np.ndarray], stats Dictionary of statistic of the training data. Contains the minimum and maximum value of each numerical feature in the training set. Each key is an index of the column and each value is another dictionary - containing `min` and `max` keys. + containing ``'min'`` and ``'max'`` keys. Returns ------- @@ -392,12 +392,12 @@ def sample(X_hat_split: List[np.ndarray], stats Dictionary of statistic of the training data. Contains the minimum and maximum value of each numerical feature in the training set. Each key is an index of the column and each value is another dictionary - containing `min` and `max` keys. + containing ``'min'`` and ``'max'`` keys. Returns ------- X_ohe_hat_split - Most probable reconstruction sample according to the autoencoder, sampled according to the conditional vector + Most probable reconstruction sample according to the auto-encoder, sampled according to the conditional vector and the dictionary of statistics. This method assumes that the input array, `X_ohe` , has the first columns corresponding to the numerical features, and the rest are one-hot encodings of the categorical columns. """ @@ -549,7 +549,7 @@ def get_statistics(X: np.ndarray, Returns ------- - Dictionary of statistics. For each numerical column, the minimum and maximum value is returned. + Dictionary of statistics. For each numerical column, the minimum and maximum value is returned. """ stats = dict() @@ -577,9 +577,9 @@ def get_numerical_conditional_vector(X: np.ndarray, diverse=False) -> List[np.ndarray]: """ Generates a conditional vector. The condition is expressed a a delta change of the feature. - For numerical features, if the `Age` feature is allowed to increase up to 10 more years, the delta change is - `[0, 10]`. If the `Hours per week` is allowed to decrease down to `-5` and increases up to `+10`, then the - delta change is `[-5, +10]`. Note that the interval must go include `0`. + For numerical features, if the ``'Age'`` feature is allowed to increase up to 10 more years, the delta change is + [0, 10]. If the ``'Hours per week'`` is allowed to decrease down to -5 and increases up to +10, then the + delta change is [-5, +10]. Note that the interval must go include 0. Parameters ---------- @@ -597,11 +597,11 @@ def get_numerical_conditional_vector(X: np.ndarray, List of feature names. This should be provided by the dataset. category_map Dictionary of category mapping. The keys are column indexes and the values are lists containing the - possible feature values. This should be provided by the dataset. + possible feature values. This should be provided by the dataset. stats Dictionary of statistic of the training data. Contains the minimum and maximum value of each numerical feature in the training set. Each key is an index of the column and each value is another dictionary - containing `min` and `max` keys. + containing ``'min'`` and ``'max'`` keys. ranges Dictionary of ranges for numerical feature. Each value is a list containing two elements, first one negative and the second one positive. @@ -613,7 +613,7 @@ def get_numerical_conditional_vector(X: np.ndarray, Returns ------- - List of conditional vectors for each numerical feature. + List of conditional vectors for each numerical feature. """ if ranges is None: ranges = dict() @@ -693,8 +693,9 @@ def get_categorical_conditional_vector(X: np.ndarray, diverse=False) -> List[np.ndarray]: """ Generates a conditional vector. The condition is expressed a a delta change of the feature. - For categorical feature, if the `Occupation` can change to `Blue-Collar` or `White-Collar` the delta change - is `['Blue-Collar', 'White-Collar']`. Note that the original value is optional as it is included by default. + For categorical feature, if the ``'Occupation'`` can change to ``'Blue-Collar'`` or ``'White-Collar'``, the delta + change is ``['Blue-Collar', 'White-Collar']``. Note that the original value is optional as it is + included by default. Parameters ---------- @@ -721,7 +722,7 @@ def get_categorical_conditional_vector(X: np.ndarray, Returns ------- - List of conditional vectors for each categorical feature. + List of conditional vectors for each categorical feature. """ if immutable_features is None: immutable_features = list() @@ -774,12 +775,13 @@ def get_conditional_vector(X: np.ndarray, """ Generates a conditional vector. The condition is expressed a a delta change of the feature. - For numerical features, if the `Age` feature is allowed to increase up to 10 more years, the delta change is - `[0, 10]`. If the `Hours per week` is allowed to decrease down to `-5` and increases up to `+10`, then the - delta change is `[-5, +10]`. Note that the interval must go include `0`. + For numerical features, if the ``'Age'`` feature is allowed to increase up to 10 more years, the delta change is + [0, 10]. If the ``'Hours per week'`` is allowed to decrease down to -5 and increases up to +10, then the + delta change is [-5, +10]. Note that the interval must go include 0. - For categorical feature, if the `Occupation` can change to `Blue-Collar` or `White-Collar` the delta change - is `['Blue-Collar', 'White-Collar']`. Note that the original value is optional as it is included by default. + For categorical feature, if the ``'Occupation'`` can change to ``'Blue-Collar'`` or ``'White-Collar'``, + the delta change is ``['Blue-Collar', 'White-Collar']``. Note that the original value is optional as it is + included by default. Parameters ---------- @@ -801,7 +803,7 @@ def get_conditional_vector(X: np.ndarray, stats Dictionary of statistic of the training data. Contains the minimum and maximum value of each numerical feature in the training set. Each key is an index of the column and each value is another dictionary - containing `min` and `max` keys. + containing ``'min'`` and ``'max'`` keys. ranges Dictionary of ranges for numerical feature. Each value is a list containing two elements, first one negative and the second one positive. @@ -813,7 +815,7 @@ def get_conditional_vector(X: np.ndarray, Returns ------- - Conditional vector. + Conditional vector. """ if ranges is None: ranges = dict() @@ -867,7 +869,7 @@ def apply_category_mapping(X: np.ndarray, category_map: Dict[int, List[str]]) -> Returns ------- - Transformed array. + Transformed array. """ pd_X = pd.DataFrame(X) diff --git a/alibi/explainers/backends/pytorch/cfrl_base.py b/alibi/explainers/backends/pytorch/cfrl_base.py index 6c240595e..f9c25ca28 100644 --- a/alibi/explainers/backends/pytorch/cfrl_base.py +++ b/alibi/explainers/backends/pytorch/cfrl_base.py @@ -38,7 +38,8 @@ def __init__(self, Array of input instances. The input should NOT be preprocessed as it will be preprocessed when calling the `preprocessor` function. preprocessor - Preprocessor function. This function correspond to the preprocessing steps applied to the autoencoder model. + Preprocessor function. This function correspond to the preprocessing steps applied to + the auto-encoder model. predictor Prediction function. The classifier function should expect the input in the original format and preprocess it internally in the `predictor` if necessary. @@ -101,22 +102,22 @@ def __getitem__(self, idx) -> Dict[str, np.ndarray]: def get_device() -> torch.device: """ - Checks if cuda is available. If available, use cuda by default, else use cpu. + Checks if `cuda` is available. If available, use `cuda` by default, else use `cpu`. Returns ------- - Device to be used. + Device to be used. """ return torch.device("cuda" if torch.cuda.is_available() else "cpu") def get_optimizer(model: nn.Module, lr: float = 1e-3) -> torch.optim.Optimizer: """ - Constructs default Adam optimizer. + Constructs default `Adam` optimizer. Returns ------- - Default optimizer. + Default optimizer. """ return torch.optim.Adam(model.parameters(), lr=lr) @@ -134,7 +135,7 @@ def get_actor(hidden_dim: int, output_dim: int) -> nn.Module: Returns ------- - Actor network. + Actor network. """ return Actor(hidden_dim=hidden_dim, output_dim=output_dim) @@ -150,7 +151,7 @@ def get_critic(hidden_dim: int) -> nn.Module: Returns ------- - Critic network. + Critic network. """ return Critic(hidden_dim=hidden_dim) @@ -162,13 +163,13 @@ def sparsity_loss(X_hat_cf: torch.Tensor, X: torch.Tensor) -> Dict[str, torch.Te Parameters ---------- X_hat_cf - Autoencoder counterfactual reconstruction. + Auto-encoder counterfactual reconstruction. X Input instance Returns ------- - L1 sparsity loss. + L1 sparsity loss. """ return {"sparsity_loss": F.l1_loss(X_hat_cf, X)} @@ -186,7 +187,7 @@ def consistency_loss(Z_cf_pred: torch.Tensor, Z_cf_tgt: torch.Tensor): Returns ------- - 0 consistency loss. + 0 consistency loss. """ return {"consistency_loss": 0} @@ -208,8 +209,8 @@ def data_generator(X: np.ndarray, Array of input instances. The input should NOT be preprocessed as it will be preprocessed when calling the `preprocessor` function. encoder_preprocessor - Preprocessor function. This function correspond to the preprocessing steps applied to the encoder/autoencoder - model. + Preprocessor function. This function correspond to the preprocessing steps applied to the + encoder/auto-encoder model. predictor Prediction function. The classifier function should expect the input in the original format and preprocess it internally in the `predictor` if necessary. @@ -220,9 +221,11 @@ def data_generator(X: np.ndarray, Dimension of the batch used during training. The same batch size is used to infer the classification labels of the input dataset. shuffle - Whether to shuffle the dataset each epoch. `True` by default. + Whether to shuffle the dataset each epoch. ``True`` by default. num_workers Number of worker processes to be created. + **kwargs + Other arguments. Not used. """ dataset = PtCounterfactualRLDataset(X=X, preprocessor=encoder_preprocessor, predictor=predictor, conditional_func=conditional_func, batch_size=batch_size) @@ -268,7 +271,7 @@ def decode(Z: torch.Tensor, decoder: nn.Module, device: torch.device, **kwargs): Returns ------- - Embedding tensor decoding. + Embedding tensor decoding. """ decoder.eval() return decoder(Z.float().to(device)) @@ -441,10 +444,12 @@ def update_actor_critic(encoder: nn.Module, Noised counterfactual reward. device Torch device object. + **kwargs + Other arguments. Not used. Returns ------- - Dictionary of losses. + Dictionary of losses. """ # Set autoencoder to evaluation mode. encoder.eval() @@ -526,16 +531,16 @@ def update_actor_critic(encoder: nn.Module, def to_numpy(X: Optional[Union[List, np.ndarray, torch.Tensor]]) -> Optional[Union[List, np.ndarray]]: """ - Converts given tensor to numpy array. + Converts given tensor to `numpy` array. Parameters ---------- X - Input tensor to be converted to numpy array. + Input tensor to be converted to `numpy` array. Returns ------- - Numpy representation of the input tensor. + `Numpy` representation of the input tensor. """ if X is not None: if isinstance(X, np.ndarray): @@ -554,11 +559,11 @@ def to_numpy(X: Optional[Union[List, np.ndarray, torch.Tensor]]) -> Optional[Uni def to_tensor(X: Union[np.ndarray, torch.Tensor], device: torch.device, **kwargs) -> Optional[torch.Tensor]: """ - Converts tensor to torch.Tensor + Converts tensor to `torch.Tensor` Returns ------- - torch.Tensor conversion. + `torch.Tensor` conversion. """ if X is not None: if isinstance(X, torch.Tensor): @@ -594,7 +599,7 @@ def load_model(path: Union[str, os.PathLike]) -> nn.Module: Returns ------- - Loaded model. + Loaded model. """ model = torch.load(path) model.eval() @@ -603,12 +608,12 @@ def load_model(path: Union[str, os.PathLike]) -> nn.Module: def set_seed(seed: int = 13): """ - Sets a seed to ensure reproducibility + Sets a seed to ensure reproducibility. Parameters ---------- seed - seed to be set + Seed to be set. """ # Others np.random.seed(seed) diff --git a/alibi/explainers/backends/pytorch/cfrl_tabular.py b/alibi/explainers/backends/pytorch/cfrl_tabular.py index bc99281f5..c788b1322 100644 --- a/alibi/explainers/backends/pytorch/cfrl_tabular.py +++ b/alibi/explainers/backends/pytorch/cfrl_tabular.py @@ -35,7 +35,7 @@ def sample_differentiable(X_hat_split: List[torch.Tensor], Returns ------- - Differentiable reconstruction. + Differentiable reconstruction. """ num_attr = len(X_hat_split) - len(category_map) cat_attr = len(category_map) @@ -72,11 +72,11 @@ def l0_ohe(input: torch.Tensor, target Target tensor reduction - Specifies the reduction to apply to the output: `none` | `mean` | `sum`. + Specifies the reduction to apply to the output: ``'none'`` | ``'mean'`` | ``'sum'``. Returns ------- - L0 loss. + L0 loss. """ # Order matters as the gradient of zeros will still flow if reversed order. Maybe consider clipping a bit higher? eps = 1e-7 / input.shape[1] @@ -105,11 +105,11 @@ def l1_loss(input: torch.Tensor, target: torch.Tensor, reduction: str = 'none') target Target tensor. reduction - Specifies the reduction to apply to the output: `none` | `mean` | `sum`. + Specifies the reduction to apply to the output: ``'none'`` | ``'mean'`` | ``'sum'``. Returns ------- - L1 loss. + L1 loss. """ return F.l1_loss(input=input, target=target, reduction=reduction) @@ -138,7 +138,7 @@ def sparsity_loss(X_hat_split: List[torch.Tensor], Returns ------- - Heterogeneous sparsity loss. + Heterogeneous sparsity loss. """ # Split the input into a list of tensor, where each element corresponds to a network head X_ohe_num_split, X_ohe_cat_split = split_ohe(X_ohe=X_ohe, @@ -183,7 +183,7 @@ def consistency_loss(Z_cf_pred: torch.Tensor, Z_cf_tgt: torch.Tensor, **kwargs): Returns ------- - Heterogeneous consistency loss. + Heterogeneous consistency loss. """ # Compute consistency loss loss = F.mse_loss(Z_cf_pred, Z_cf_tgt) diff --git a/alibi/explainers/backends/tensorflow/cfrl_base.py b/alibi/explainers/backends/tensorflow/cfrl_base.py index e3dd49faa..3e59dda98 100644 --- a/alibi/explainers/backends/tensorflow/cfrl_base.py +++ b/alibi/explainers/backends/tensorflow/cfrl_base.py @@ -37,7 +37,7 @@ def __init__(self, the `preprocessor` function. preprocessor Preprocessor function. This function correspond to the preprocessing steps applied to the - encoder/autoencoder model. + encoder/auto-encoder model. predictor Prediction function. The classifier function should expect the input in the original format and preprocess it internally in the `predictor` if necessary. @@ -48,7 +48,7 @@ def __init__(self, Dimension of the batch used during training. The same batch size is used to infer the classification labels of the input dataset. shuffle - Whether to shuffle the dataset each epoch. `True` by default. + Whether to shuffle the dataset each epoch. ``True`` by default. """ super().__init__() @@ -113,7 +113,7 @@ def __getitem__(self, idx) -> Dict[str, np.ndarray]: def get_optimizer(model: Optional[keras.layers.Layer] = None, lr: float = 1e-3) -> keras.optimizers.Optimizer: """ - Constructs default Adam optimizer. + Constructs default `Adam` optimizer. Parameters ---------- @@ -124,7 +124,7 @@ def get_optimizer(model: Optional[keras.layers.Layer] = None, lr: float = 1e-3) Returns ------- - Default optimizer. + Default optimizer. """ return keras.optimizers.Adam(learning_rate=lr) @@ -142,7 +142,7 @@ def get_actor(hidden_dim: int, output_dim: int) -> keras.layers.Layer: Returns ------- - Actor network. + Actor network. """ return Actor(hidden_dim=hidden_dim, output_dim=output_dim) @@ -158,7 +158,7 @@ def get_critic(hidden_dim: int) -> keras.layers.Layer: Returns ------- - Critic network. + Critic network. """ return Critic(hidden_dim=hidden_dim) @@ -170,13 +170,13 @@ def sparsity_loss(X_hat_cf: tf.Tensor, X: tf.Tensor) -> Dict[str, tf.Tensor]: Parameters ---------- X_hat_cf - Autoencoder counterfactual reconstruction. + Auto-encoder counterfactual reconstruction. X - Input instance + Input instance. Returns ------- - L1 sparsity loss. + L1 sparsity loss. """ return {"sparsity_loss": tf.reduce_mean(tf.abs(X_hat_cf - X))} @@ -194,7 +194,7 @@ def consistency_loss(Z_cf_pred: tf.Tensor, Z_cf_tgt: tf.Tensor): Returns ------- - 0 consistency loss. + 0 consistency loss. """ return {"consistency_loss": 0} @@ -207,7 +207,7 @@ def data_generator(X: np.ndarray, shuffle: bool = True, **kwargs): """ - Constructs a tensorflow data generator. + Constructs a `tensorflow` data generator. Parameters ---------- @@ -215,7 +215,7 @@ def data_generator(X: np.ndarray, Array of input instances. The input should NOT be preprocessed as it will be preprocessed when calling the `preprocessor` function. encoder_preprocessor - Preprocessor function. This function correspond to the preprocessing steps applied to the encoder/autoencoder + Preprocessor function. This function correspond to the preprocessing steps applied to the encoder/auto-encoder model. predictor Prediction function. The classifier function should expect the input in the original format and preprocess @@ -227,7 +227,9 @@ def data_generator(X: np.ndarray, Dimension of the batch used during training. The same batch size is used to infer the classification labels of the input dataset. shuffle - Whether to shuffle the dataset each epoch. `True` by default. + Whether to shuffle the dataset each epoch. ``True`` by default. + **kwargs + Other arguments. Not used. """ return TfCounterfactualRLDataset(X=X, preprocessor=encoder_preprocessor, predictor=predictor, conditional_func=conditional_func, batch_size=batch_size, shuffle=shuffle) @@ -243,10 +245,12 @@ def encode(X: Union[tf.Tensor, np.ndarray], encoder: keras.Model, **kwargs) -> t Input to be encoded. encoder Pretrained encoder network. + **kwargs + Other arguments. Not used. Returns ------- - Input encoding. + Input encoding. """ return encoder(X, training=False) @@ -261,10 +265,12 @@ def decode(Z: Union[tf.Tensor, np.ndarray], decoder: keras.Model, **kwargs): Embedding tensor to be decoded. decoder Pretrained decoder network. + **kwargs + Other arguments. Not used. Returns ------- - Embedding tensor decoding. + Embedding tensor decoding. """ return decoder(Z, training=False) @@ -290,6 +296,8 @@ def generate_cf(Z: Union[np.ndarray, tf.Tensor], Conditional tensor. actor Actor network. The model generates the counterfactual embedding. + **kwargs + Other arguments. Not used. Returns ------- @@ -335,6 +343,8 @@ def add_noise(Z_cf: Union[tf.Tensor, np.ndarray], exploration_steps Number of exploration steps. For the first `exploration_steps`, the noised counterfactual embedding is sampled uniformly at random. + **kwargs + Other arguments. Not used. Returns ------- @@ -388,6 +398,8 @@ def initialize_optimizers(optimizer_actor, optimizer_critic, actor, critic, **kw Actor model to be optimized. critic Critic model to be optimized. + **kwargs + Other arguments. Not used. """ initialize_optimizer(optimizer=optimizer_actor, model=actor) initialize_optimizer(optimizer=optimizer_critic, model=critic) @@ -413,6 +425,8 @@ def initialize_actor_critic(actor, critic, Z, Z_cf_tilde, Y_m, Y_t, C, **kwargs) Target counterfactual classification label. C Conditional tensor. + **kwargs + Other arguments. Not used. """ # Define zero data. Z = tf.zeros((1, *Z.shape[1:]), dtype=tf.float32) @@ -499,10 +513,12 @@ def update_actor_critic(encoder: keras.Model, Conditional tensor. R_tilde Noised counterfactual reward. + **kwargs + Other arguments. Not used. Returns ------- - Dictionary of losses. + Dictionary of losses. """ # Define dictionary of losses. losses: Dict[str, float] = dict() @@ -572,16 +588,16 @@ def update_actor_critic(encoder: keras.Model, def to_numpy(X: Optional[Union[List, np.ndarray, tf.Tensor]]) -> Optional[Union[List, np.ndarray]]: """ - Converts given tensor to numpy array. + Converts given tensor to `numpy` array. Parameters ---------- X - Input tensor to be converted to numpy array. + Input tensor to be converted to `numpy` array. Returns ------- - Numpy representation of the input tensor. + `Numpy` representation of the input tensor. """ if X is not None: if isinstance(X, np.ndarray): @@ -599,11 +615,18 @@ def to_numpy(X: Optional[Union[List, np.ndarray, tf.Tensor]]) -> Optional[Union[ def to_tensor(X: Union[np.ndarray, tf.Tensor], **kwargs) -> Optional[tf.Tensor]: """ - Converts tensor to tf.Tensor + Converts tensor to `tf.Tensor`. + + Parameters + ---------- + X + Input array/tensor to be converted. + **kwargs + Other arguments. Not used. Returns ------- - tf.Tensor conversion. + `tf.Tensor` conversion. """ if X is not None: if isinstance(X, tf.Tensor): @@ -639,7 +662,7 @@ def load_model(path: Union[str, os.PathLike]) -> keras.Model: Returns ------- - Loaded model. + Loaded model. """ return keras.models.load_model(path, compile=False) diff --git a/alibi/explainers/backends/tensorflow/cfrl_tabular.py b/alibi/explainers/backends/tensorflow/cfrl_tabular.py index b057efbd5..69dbf93fc 100644 --- a/alibi/explainers/backends/tensorflow/cfrl_tabular.py +++ b/alibi/explainers/backends/tensorflow/cfrl_tabular.py @@ -35,7 +35,7 @@ def sample_differentiable(X_hat_split: List[tf.Tensor], Returns ------- - Differentiable reconstruction. + Differentiable reconstruction. """ num_attr = len(X_hat_split) - len(category_map) cat_attr = len(category_map) @@ -72,11 +72,11 @@ def l0_ohe(input: tf.Tensor, target Target tensor reduction - Specifies the reduction to apply to the output: `none` | `mean` | `sum`. + Specifies the reduction to apply to the output: ``'none'`` | ``'mean'`` | ``'sum'``. Returns ------- - L0 loss. + L0 loss. """ # Order matters as the gradient of zeros will still flow if reversed order. Maybe consider clipping a bit higher? eps = 1e-7 / input.shape[1] @@ -105,11 +105,11 @@ def l1_loss(input: tf.Tensor, target=tf.Tensor, reduction: str = 'none') -> tf.T target Target tensor reduction - Specifies the reduction to apply to the output: `none` | `mean` | `sum`. + Specifies the reduction to apply to the output: ``'none'`` | ``'mean'`` | ``'sum'``. Returns ------- - L1 loss. + L1 loss. """ loss = tf.abs(input - target) @@ -149,7 +149,7 @@ def sparsity_loss(X_hat_split: List[tf.Tensor], Returns ------- - Heterogeneous sparsity loss. + Heterogeneous sparsity loss. """ # Split the input into a list of tensor, where each element corresponds to a network head X_ohe_num_split, X_ohe_cat_split = split_ohe(X_ohe=X_ohe, @@ -193,10 +193,9 @@ def consistency_loss(Z_cf_pred: tf.Tensor, Z_cf_tgt: Union[np.ndarray, tf.Tensor Z_cf_tgt Counterfactual embedding target. - Returns ------- - Heterogeneous consistency loss. + Heterogeneous consistency loss. """ # Compute consistency loss loss = tf.reduce_mean(tf.square(Z_cf_pred - Z_cf_tgt)) diff --git a/alibi/explainers/cem.py b/alibi/explainers/cem.py index cf477fad8..5681ff059 100644 --- a/alibi/explainers/cem.py +++ b/alibi/explainers/cem.py @@ -40,46 +40,46 @@ def __init__(self, Parameters ---------- predict - Keras or TensorFlow model or any other model's prediction function returning class probabilities + `tensorflow` model or any other model's prediction function returning class probabilities. mode - Find pertinant negatives ('PN') or pertinant positives ('PP') + Find pertinent negatives (PN) or pertinent positives (PP). shape - Shape of input data starting with batch size + Shape of input data starting with batch size. kappa - Confidence parameter for the attack loss term + Confidence parameter for the attack loss term. beta - Regularization constant for L1 loss term + Regularization constant for L1 loss term. feature_range - Tuple with min and max ranges to allow for perturbed instances. Min and max ranges can be floats or - numpy arrays with dimension (1x nb of features) for feature-wise ranges + Tuple with min and max ranges to allow for perturbed instances. Min and max ranges can be `float` or + `numpy` arrays with dimension (1x nb of features) for feature-wise ranges. gamma - Regularization constant for optional auto-encoder loss term + Regularization constant for optional auto-encoder loss term. ae_model - Optional auto-encoder model used for loss regularization + Optional auto-encoder model used for loss regularization. learning_rate_init - Initial learning rate of optimizer + Initial learning rate of optimizer. max_iterations - Maximum number of iterations for finding a PN or PP + Maximum number of iterations for finding a PN or PP. c_init - Initial value to scale the attack loss term + Initial value to scale the attack loss term. c_steps - Number of iterations to adjust the constant scaling the attack loss term + Number of iterations to adjust the constant scaling the attack loss term. eps - If numerical gradients are used to compute dL/dx = (dL/dp) * (dp/dx), then eps[0] is used to - calculate dL/dp and eps[1] is used for dp/dx. eps[0] and eps[1] can be a combination of float values and - numpy arrays. For eps[0], the array dimension should be (1x nb of prediction categories) and for - eps[1] it should be (1x nb of features) + If numerical gradients are used to compute `dL/dx = (dL/dp) * (dp/dx)`, then `eps[0]` is used to + calculate `dL/dp` and `eps[1]` is used for `dp/dx`. `eps[0]` and `eps[1]` can be a combination of `float` + values and `numpy` arrays. For `eps[0]`, the array dimension should be (1x nb of prediction categories) + and for `eps[1]` it should be (1x nb of features). clip - Tuple with min and max clip ranges for both the numerical gradients and the gradients - obtained from the TensorFlow graph + Tuple with `min` and `max` clip ranges for both the numerical gradients and the gradients + obtained from the `tensorflow` graph. update_num_grad - If numerical gradients are used, they will be updated every update_num_grad iterations + If numerical gradients are used, they will be updated every `update_num_grad` iterations. no_info_val - Global or feature-wise value considered as containing no information + Global or feature-wise value considered as containing no information. write_dir - Directory to write tensorboard files to + Directory to write `tensorboard` files to. sess - Optional Tensorflow session that will be used if passed instead of creating or inferring one internally + Optional `tensorflow` session that will be used if passed instead of creating or inferring one internally. """ super().__init__(meta=copy.deepcopy(DEFAULT_META_CEM)) # get params for storage in meta @@ -303,9 +303,9 @@ def fit(self, train_data: np.ndarray, no_info_type: str = 'median') -> "CEM": Parameters ---------- train_data - Representative sample from the training data + Representative sample from the training data. no_info_type - Median or mean value by feature supported + Median or mean value by feature supported. """ # TODO: find equal distance area in distribution to different classes as "no info" area if self.no_info_val is not None: @@ -332,9 +332,9 @@ def loss_fn(self, pred_proba: np.ndarray, Y: np.ndarray) -> np.ndarray: Parameters ---------- pred_proba - Prediction probabilities of an instance + Prediction probabilities of an instance. Y - One-hot representation of instance labels + One-hot representation of instance labels. Returns ------- @@ -363,11 +363,12 @@ def perturb(self, X: np.ndarray, eps: Union[float, np.ndarray], proba: bool = Fa Parameters ---------- X - Array to be perturbed + Array to be perturbed. eps - Size of perturbation + Size of perturbation. proba - If True, the net effect of the perturbation needs to be 0 to keep the sum of the probabilities equal to 1 + If ``True``, the net effect of the perturbation needs to be 0 to keep the sum of the + probabilities equal to 1. Returns ------- @@ -391,14 +392,14 @@ def perturb(self, X: np.ndarray, eps: Union[float, np.ndarray], proba: bool = Fa def get_gradients(self, X: np.ndarray, Y: np.ndarray) -> np.ndarray: """ Compute numerical gradients of the attack loss term: - dL/dx = (dL/dP)*(dP/dx) with L = loss_attack_s; P = predict; x = adv_s + `dL/dx = (dL/dP)*(dP/dx)` with `L = loss_attack_s; P = predict; x = adv_s` Parameters ---------- X - Instance around which gradient is evaluated + Instance around which gradient is evaluated. Y - One-hot representation of instance labels + One-hot representation of instance labels. Returns ------- @@ -446,18 +447,17 @@ def g(preds_pert): def attack(self, X: np.ndarray, Y: np.ndarray, verbose: bool = False) \ -> Tuple[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """ - Find pertinent negative or pertinent positive for instance X using a fast iterative + Find pertinent negative or pertinent positive for instance `X` using a fast iterative shrinkage-thresholding algorithm (FISTA). Parameters ---------- X - Instance to attack + Instance to attack. Y - Labels for X + Labels for `X`. verbose - Print intermediate results of optimization if True - + Print intermediate results of optimization if ``True``. Returns ------- @@ -479,9 +479,9 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool: Parameters ---------- x - Predicted class probabilities or labels + Predicted class probabilities or labels. y - Target or predicted labels + Target or predicted labels. Returns ------- @@ -661,16 +661,20 @@ def explain(self, X: np.ndarray, Y: Optional[np.ndarray] = None, verbose: bool = Parameters ---------- X - Instances to attack + Instances to attack. Y - Labels for X + Labels for `X`. verbose - Print intermediate results of optimization if True + Print intermediate results of optimization if ``True``. Returns ------- explanation `Explanation` object containing the PP or PN with additional metadata as attributes. + See usage at `CEM examples`_ for details. + + .. _CEM examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/CEM.html """ if X.shape[0] != 1: logger.warning('Currently only single instance explanations supported (first dim = 1), ' @@ -715,4 +719,12 @@ def explain(self, X: np.ndarray, Y: Optional[np.ndarray] = None, verbose: bool = return explanation def reset_predictor(self, predictor: Union[Callable, tf.keras.Model]) -> None: + """ + Resets the predictor function/model. + + Parameters + ---------- + predictor + New predictor function/model. + """ raise NotImplementedError('Resetting a predictor is currently not supported') diff --git a/alibi/explainers/cfproto.py b/alibi/explainers/cfproto.py index 0a547c0c8..2f5ec2ad7 100644 --- a/alibi/explainers/cfproto.py +++ b/alibi/explainers/cfproto.py @@ -61,54 +61,53 @@ def __init__(self, Parameters ---------- predict - Keras or TensorFlow model or any other model's prediction function returning class probabilities + `tensorflow` model or any other model's prediction function returning class probabilities. shape - Shape of input data starting with batch size + Shape of input data starting with batch size. kappa - Confidence parameter for the attack loss term + Confidence parameter for the attack loss term. beta - Regularization constant for L1 loss term + Regularization constant for L1 loss term. feature_range - Tuple with min and max ranges to allow for perturbed instances. Min and max ranges can be floats or - numpy arrays with dimension (1x nb of features) for feature-wise ranges + Tuple with `min` and `max` ranges to allow for perturbed instances. `Min` and `max` ranges can be `float` + or `numpy` arrays with dimension (1x nb of features) for feature-wise ranges. gamma - Regularization constant for optional auto-encoder loss term + Regularization constant for optional auto-encoder loss term. ae_model - Optional auto-encoder model used for loss regularization + Optional auto-encoder model used for loss regularization. enc_model - Optional encoder model used to guide instance perturbations towards a class prototype + Optional encoder model used to guide instance perturbations towards a class prototype. theta - Constant for the prototype search loss term + Constant for the prototype search loss term. cat_vars - Dict with as keys the categorical columns and as values - the number of categories per categorical variable. + Dict with as keys the categorical columns and as values the number of categories per categorical variable. ohe Whether the categorical variables are one-hot encoded (OHE) or not. If not OHE, they are assumed to have ordinal encodings. use_kdtree - Whether to use k-d trees for the prototype loss term if no encoder is available + Whether to use k-d trees for the prototype loss term if no encoder is available. learning_rate_init - Initial learning rate of optimizer + Initial learning rate of optimizer. max_iterations - Maximum number of iterations for finding a counterfactual + Maximum number of iterations for finding a counterfactual. c_init - Initial value to scale the attack loss term + Initial value to scale the attack loss term. c_steps - Number of iterations to adjust the constant scaling the attack loss term + Number of iterations to adjust the constant scaling the attack loss term. eps - If numerical gradients are used to compute dL/dx = (dL/dp) * (dp/dx), then eps[0] is used to - calculate dL/dp and eps[1] is used for dp/dx. eps[0] and eps[1] can be a combination of float values and - numpy arrays. For eps[0], the array dimension should be (1x nb of prediction categories) and for - eps[1] it should be (1x nb of features) + If numerical gradients are used to compute `dL/dx = (dL/dp) * (dp/dx)`, then `eps[0]` is used to + calculate `dL/dp` and `eps[1]` is used for `dp/dx`. `eps[0]` and `eps[1]` can be a combination of `float` + values and `numpy` arrays. For `eps[0]`, the array dimension should be (1x nb of prediction categories) + and for `eps[1]` it should be (1x nb of features). clip Tuple with min and max clip ranges for both the numerical gradients and the gradients - obtained from the TensorFlow graph + obtained from the `tensorflow` graph. update_num_grad - If numerical gradients are used, they will be updated every update_num_grad iterations + If numerical gradients are used, they will be updated every `update_num_grad` iterations. write_dir - Directory to write tensorboard files to + Directory to write `tensorboard` files to. sess - Optional Tensorflow session that will be used if passed instead of creating or inferring one internally + Optional `tensorflow` session that will be used if passed instead of creating or inferring one internally. """ super().__init__(meta=copy.deepcopy(DEFAULT_META_CFP)) params = locals() @@ -443,7 +442,7 @@ def apply_map(adv_to_map, to_num): adv_to_map Instance to map. to_num - Map from categorical to numerical values if True, vice versa if False. + Map from categorical to numerical values if ``True``, vice versa if ``False``. Returns ------- @@ -679,21 +678,21 @@ def fit(self, trustscore_kwargs Optional arguments to initialize the trust scores method. d_type - Pairwise distance metric used for categorical variables. Currently, 'abdm', 'mvdm' and 'abdm-mvdm' - are supported. 'abdm' infers context from the other variables while 'mvdm' uses the model predictions. - 'abdm-mvdm' is a weighted combination of the two metrics. + Pairwise distance metric used for categorical variables. Currently, ``'abdm'``, ``'mvdm'`` and + ``'abdm-mvdm'`` are supported. ``'abdm'`` infers context from the other variables while ``'mvdm'`` uses + the model predictions. ``'abdm-mvdm'`` is a weighted combination of the two metrics. w - Weight on 'abdm' (between 0. and 1.) distance if d_type equals 'abdm-mvdm'. + Weight on ``'abdm'`` (between 0. and 1.) distance if `d_type` equals ``'abdm-mvdm'``. disc_perc - List with percentiles used in binning of numerical features used for the 'abdm' - and 'abdm-mvdm' pairwise distance measures. + List with percentiles used in binning of numerical features used for the ``'abdm'`` + and ``'abdm-mvdm'`` pairwise distance measures. standardize_cat_vars - Standardize numerical values of categorical variables if True. + Standardize numerical values of categorical variables if ``True``. smooth - Smoothing exponent between 0 and 1 for the distances. Lower values of l will smooth the difference in + Smoothing exponent between 0 and 1 for the distances. Lower values will smooth the difference in distance metric between different features. center - Whether to center the scaled distance measures. If False, the min distance for each feature + Whether to center the scaled distance measures. If ``False``, the min distance for each feature except for the feature with the highest raw max distance will be the lower bound of the feature range, but the upper bound will be below the max feature range. update_feature_range @@ -822,9 +821,9 @@ def loss_fn(self, pred_proba: np.ndarray, Y: np.ndarray) -> np.ndarray: Parameters ---------- pred_proba - Prediction probabilities of an instance + Prediction probabilities of an instance. Y - One-hot representation of instance labels + One-hot representation of instance labels. Returns ------- @@ -844,14 +843,14 @@ def get_gradients(self, X: np.ndarray, Y: np.ndarray, grads_shape: tuple, cat_vars_ord: dict) -> np.ndarray: """ Compute numerical gradients of the attack loss term: - dL/dx = (dL/dP)*(dP/dx) with L = loss_attack_s; P = predict; x = adv_s + `dL/dx = (dL/dP)*(dP/dx)` with `L = loss_attack_s; P = predict; x = adv_s`. Parameters ---------- X - Instance around which gradient is evaluated + Instance around which gradient is evaluated. Y - One-hot representation of instance labels + One-hot representation of instance labels. grads_shape Shape of gradients. cat_vars_ord @@ -917,17 +916,17 @@ def score(self, X: np.ndarray, adv_class: int, orig_class: int, eps: float = 1e- Parameters ---------- X - Instance to encode and calculate distance metrics for + Instance to encode and calculate distance metrics for. adv_class - Predicted class on the perturbed instance + Predicted class on the perturbed instance. orig_class - Predicted class on the original instance + Predicted class on the original instance. eps - Small number to avoid dividing by 0 + Small number to avoid dividing by 0. Returns ------- - Ratio between the distance to the prototype of the predicted class for the original instance and + Ratio between the distance to the prototype of the predicted class for the original instance and \ the prototype of the predicted class for the perturbed instance. """ if self.enc_model: @@ -951,23 +950,23 @@ def attack(self, X: np.ndarray, Y: np.ndarray, target_class: Optional[list] = No k_type: str = 'mean', threshold: float = 0., verbose: bool = False, print_every: int = 100, log_every: int = 100) -> Tuple[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """ - Find a counterfactual (CF) for instance X using a fast iterative shrinkage-thresholding algorithm (FISTA). + Find a counterfactual (CF) for instance `X` using a fast iterative shrinkage-thresholding algorithm (FISTA). Parameters ---------- X - Instance to attack + Instance to attack. Y - Labels for X as one-hot-encoding + Labels for `X` as one-hot-encoding. target_class - List with target classes used to find closest prototype. If None, the nearest prototype + List with target classes used to find closest prototype. If ``None``, the nearest prototype except for the predict class on the instance is used. k Number of nearest instances used to define the prototype for a class. Defaults to using all instances belonging to the class if an encoder is used and to 1 for k-d trees. k_type - Use either the average encoding of the k nearest instances in a class (k_type='mean') or - the k-nearest encoding in the class (k_type='point') to define the prototype of that class. + Use either the average encoding of the k nearest instances in a class (``k_type='mean'``) or + the k-nearest encoding in the class (``k_type='point'``) to define the prototype of that class. Only relevant if an encoder is used to define the prototypes. threshold Threshold level for the ratio between the distance of the counterfactual to the prototype of the @@ -975,11 +974,11 @@ def attack(self, X: np.ndarray, Y: np.ndarray, target_class: Optional[list] = No for the counterfactual. If the trust score is below the threshold, the proposed counterfactual does not meet the requirements. verbose - Print intermediate results of optimization if True + Print intermediate results of optimization if ``True``. print_every - Print frequency if verbose is True + Print frequency if verbose is ``True``. log_every - Tensorboard log frequency if write directory is specified + `tensorboard` log frequency if write directory is specified. Returns ------- @@ -996,9 +995,9 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool: Parameters ---------- x - Predicted class probabilities or labels + Predicted class probabilities or labels. y - Target or predicted labels + Target or predicted labels. Returns ------- @@ -1288,18 +1287,18 @@ def explain(self, Parameters ---------- X - Instances to attack + Instances to attack. Y - Labels for X as one-hot-encoding + Labels for `X` as one-hot-encoding. target_class - List with target classes used to find closest prototype. If None, the nearest prototype + List with target classes used to find closest prototype. If ``None``, the nearest prototype except for the predict class on the instance is used. k Number of nearest instances used to define the prototype for a class. Defaults to using all instances belonging to the class if an encoder is used and to 1 for k-d trees. k_type - Use either the average encoding of the k nearest instances in a class (k_type='mean') or - the k-nearest encoding in the class (k_type='point') to define the prototype of that class. + Use either the average encoding of the `k` nearest instances in a class (``k_type='mean'``) or + the k-nearest encoding in the class (``k_type='point'``) to define the prototype of that class. Only relevant if an encoder is used to define the prototypes. threshold Threshold level for the ratio between the distance of the counterfactual to the prototype of the @@ -1307,16 +1306,20 @@ def explain(self, for the counterfactual. If the trust score is below the threshold, the proposed counterfactual does not meet the requirements. verbose - Print intermediate results of optimization if True + Print intermediate results of optimization if ``True``. print_every - Print frequency if verbose is True + Print frequency if verbose is ``True``. log_every - Tensorboard log frequency if write directory is specified + `tensorboard` log frequency if write directory is specified Returns ------- explanation `Explanation` object containing the counterfactual with additional metadata as attributes. + See usage at `CFProto examples`_ for details. + + .. _CFProto examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/CFProto.html """ # get params for storage in meta params = locals() @@ -1379,4 +1382,12 @@ def explain(self, return explanation def reset_predictor(self, predictor: Union[Callable, tf.keras.Model]) -> None: + """ + Resets the predictor function/model. + + Parameters + ---------- + predictor + New predictor function/model. + """ raise NotImplementedError('Resetting a predictor is currently not supported') diff --git a/alibi/explainers/cfrl_base.py b/alibi/explainers/cfrl_base.py index 8b54195d8..ed007c31d 100644 --- a/alibi/explainers/cfrl_base.py +++ b/alibi/explainers/cfrl_base.py @@ -55,7 +55,7 @@ def __call__(self, shape: Tuple[int, ...]) -> np.ndarray: Parameters ---------- shape - Shape of the tensor to be generated + Shape of the array to be generated Returns ------- @@ -71,15 +71,15 @@ class ReplayBuffer: """ Circular experience replay buffer for `CounterfactualRL` (DDPG). When the buffer is filled, then the oldest experience is replaced by the new one (FIFO). The experience batch size is kept constant and inferred when - the first batch of data is stored. Allowing flexible batch size can generate Tensorflow warning due to + the first batch of data is stored. Allowing flexible batch size can generate `tensorflow` warning due to the `tf.function` retracing, which can lead to a drop in performance. """ - X: np.ndarray # buffer for the inputs - Y_m: np.ndarray # buffer for the model's prediction - Y_t: np.ndarray # buffer for the counterfactual targets - Z: np.ndarray # buffer for the input embedding - Z_cf_tilde: np.ndarray # buffer for the noised counterfactual embedding - R_tilde: np.ndarray # buffer for the noised counterfactual reward tensor + X: np.ndarray #: Inputs buffer. + Y_m: np.ndarray #: Model's prediction buffer. + Y_t: np.ndarray #: Counterfactual targets buffer. + Z: np.ndarray #: Input embedding buffer. + Z_cf_tilde: np.ndarray #: Noised counterfactual embedding buffer. + R_tilde: np.ndarray #: Noise counterfactual rewards buffer. def __init__(self, size: int = 1000) -> None: """ @@ -89,7 +89,7 @@ def __init__(self, size: int = 1000) -> None: ---------- size Dimension of the buffer in batch size. This that the total memory allocated is proportional with the - `size` * `batch_size`, where `batch_size` is inferred from the first tensors to be stored. + `size x batch_size`, where `batch_size` is inferred from the first array to be stored. """ self.idx = 0 # cursor for the buffer @@ -116,7 +116,7 @@ def append(self, X Input array. Y_m - Model's prediction class of x. + Model's prediction class of `X`. Y_t Counterfactual target class. Z @@ -127,6 +127,8 @@ def append(self, Conditional array. R_tilde Noised counterfactual reward array. + **kwargs + Other arguments. Not used. """ # Initialize the buffers. if not hasattr(self, 'X'): @@ -173,9 +175,9 @@ def sample(self) -> Dict[str, Optional[np.ndarray]]: Returns ------- - A batch experience. For a description of the keys and values returned, see parameter descriptions \ - in :py:meth:`alibi.explainers.cfrl_base.ReplayBuffer.append` method. The batch size returned is the same \ - as the one passed in the :py:meth:`alibi.explainers.cfrl_base.ReplayBuffer.append`. + A batch experience. For a description of the keys and values returned, see parameter descriptions \ + in :py:meth:`alibi.explainers.cfrl_base.ReplayBuffer.append` method. The batch size returned is the same \ + as the one passed in the :py:meth:`alibi.explainers.cfrl_base.ReplayBuffer.append`. """ # Generate random indices to be sampled. rand_idx = np.random.randint(low=0, high=self.len * self.batch_size, size=(self.batch_size,)) @@ -231,73 +233,75 @@ def sample(self) -> Dict[str, Optional[np.ndarray]]: """ Default Counterfactual with Reinforcement Learning parameters. - - ``'act_noise'``: float, standard deviation for the normal noise added to the actor for exploration. + - ``'act_noise'`` : ``float`` - standard deviation for the normal noise added to the actor for exploration. - - ``'act_low'``: float, minimum action value. Each action component takes values between `[act_low, act_high]`. + - ``'act_low'`` : ``float`` - minimum action value. Each action component takes values between \ + `[act_low, act_high]`. - - ``'act_high'``: float, maximum action value. Each action component takes values between `[act_low, act_high]`. + - ``'act_high'`` : ``float`` - maximum action value. Each action component takes values between \ + `[act_low, act_high]`. - - ``'replay_buffer_size'``: int, dimension of the replay buffer in `batch_size` units. The total memory \ - allocated is proportional with the `size` * `batch_size`. + - ``'replay_buffer_size'`` : ``int`` - dimension of the replay buffer in `batch_size` units. The total memory \ + allocated is proportional with the `size x batch_size`. - - ``'batch_size'``: int, training batch size. + - ``'batch_size'`` : ``int`` - training batch size. - - ``'num_workers'``: int, number of workers used by the data loader if `pytorch` backend is selected. + - ``'num_workers'`` : ``int`` - number of workers used by the data loader if ``'pytorch'`` backend is selected. - - ``'shuffle'``: bool, whether to shuffle the datasets every epoch. + - ``'shuffle'`` : ``bool`` - whether to shuffle the datasets every epoch. - - ``'exploration_steps'``: int, number of exploration steps. For the firts `exploration_steps`, the \ + - ``'exploration_steps'`` : ``int`` - number of exploration steps. For the first `exploration_steps`, the \ counterfactual embedding coordinates are sampled uniformly at random from the interval `[act_low, act_high]`. - - ``'update_every'``: int, number of steps that should elapse between gradient updates. Regardless of the \ + - ``'update_every'`` : ``int`` - number of steps that should elapse between gradient updates. Regardless of the \ waiting steps, the ratio of waiting steps to gradient steps is locked to 1. - - ``'update_after'``: int, number of steps to wait before start updating the actor and critic. This ensures that \ - the replay buffers is full enough for useful updates. + - ``'update_after'`` : ``int`` - number of steps to wait before start updating the actor and critic. This ensures \ + that the replay buffers is full enough for useful updates. - - ``'backend'``: str, backend to be used: `tensorflow` | `pytorch`. Default `tensorflow`. + - ``'backend'`` : ``str`` - backend to be used: ``'tensorflow'`` | ``'pytorch'``. Default ``'tensorflow'``. - - ``'train_steps'``: int, number of train steps. + - ``'train_steps'`` : ``int`` - number of train steps. - - ``'encoder_preprocessor'``: Callable, encoder/autoencoder data preprocessors. Transforms the input data into the \ - format expected by the autoencoder. By default, the identity function. + - ``'encoder_preprocessor'`` : ``Callable`` - encoder/auto-encoder data preprocessors. Transforms the input data \ + into the format expected by the auto-encoder. By default, the identity function. - - ``'decoder_inv_preprocessor'``: Callable, decoder/autoencoder data inverse preprocessor. Transforms data from \ - the autoencoder output format to the original input format. Before calling the prediction function, the data is \ - inverse preprocessed to match the original input format. By default, the identity function. + - ``'decoder_inv_preprocessor'`` : ``Callable`` - decoder/auto-encoder data inverse preprocessor. Transforms data \ + from the auto-encoder output format to the original input format. Before calling the prediction function, the \ + data is inverse preprocessed to match the original input format. By default, the identity function. - - ``'reward_func'``: Callable, element-wise reward function. By default, considers classification task and \ + - ``'reward_func'`` : ``Callable`` - element-wise reward function. By default, considers classification task and \ checks if the counterfactual prediction label matches the target label. Note that this is element-wise, so a \ tensor is expected to be returned. - - ``'postprocessing_funcs'``: List[Postprocessing], list of post-processing functions. The function are applied in \ - the order, from low to high index. Non-differentiable post-processing can be applied. The function expects as \ - arguments `X_cf` - the counterfactual instance, `X` - the original input instance and `C` - the conditional \ - vector, and returns the post-processed counterfactual instance `X_cf_pp` which is passed as `X_cf` for the \ - following functions. By default, no post-processing is applied (empty list). + - ``'postprocessing_funcs'`` : ``List[Postprocessing]`` - list of post-processing functions. The function are \ + applied in the order, from low to high index. Non-differentiable post-processing can be applied. The function \ + expects as arguments `X_cf` - the counterfactual instance, `X` - the original input instance and `C` - the \ + conditional vector, and returns the post-processed counterfactual instance `X_cf_pp` which is passed as `X_cf` \ + for the following functions. By default, no post-processing is applied (empty list). - - ``'conditional_func'``: Callable, generates a conditional vector given a pre-processed input instance. By \ - default, the function returns `None` which is equivalent to no conditioning. + - ``'conditional_func'`` : ``Callable`` - generates a conditional vector given a pre-processed input instance. By \ + default, the function returns ``None`` which is equivalent to no conditioning. - - ``'callbacks'``: List[Callback], list of callback functions applied at the end of each training step. + - ``'callbacks'`` : ``List[Callback]`` - list of callback functions applied at the end of each training step. - - ``'actor'``: Optional[Union[tensorflow.keras.Model, torch.nn.Module]], actor network. + - ``'actor'`` : ``Optional[Union[tensorflow.keras.Model, torch.nn.Module]]`` - actor network. - - ``'critic;``: Optional[Union[tensorflow.keras.Model, torch.nn.Module]], critic network. + - ``'critic;`` : ``Optional[Union[tensorflow.keras.Model, torch.nn.Module]]`` - critic network. - - ``'optimizer_actor'``: Optional[Union[tensorflow.keras.optimizers.Optimizer, torch.optim.Optimizer]], actor \ - optimizer. + - ``'optimizer_actor'`` : ``Optional[Union[tensorflow.keras.optimizers.Optimizer, torch.optim.Optimizer]]`` - \ + actor optimizer. - - ``'optimizer_critic'``: Optional[Union[tensorflow.keras.optimizer.Optimizer, torch.optim.Optimizer]], critic \ - optimizer. + - ``'optimizer_critic'`` : ``Optional[Union[tensorflow.keras.optimizer.Optimizer, torch.optim.Optimizer]]`` - \ + critic optimizer. - - ``'lr_actor'``: float, actor learning rate. + - ``'lr_actor'`` : ``float`` - actor learning rate. - - ``'lr_critic'``: float, critic learning rate. + - ``'lr_critic'`` : ``float`` - critic learning rate. - - ``'actor_hidden_dim'``: int, actor hidden layer dimension. + - ``'actor_hidden_dim'`` : ``int`` - actor hidden layer dimension. - - ``'critic_hidden_dim'``: int, critic hidden layer dimension. + - ``'critic_hidden_dim'`` : ``int`` - critic hidden layer dimension. """ _PARAM_TYPES = { @@ -315,9 +319,10 @@ def sample(self) -> Dict[str, Optional[np.ndarray]]: """ Parameter types for serialization - - ``'primitives'``: List[str], list of parameters having primitive data types. + - ``'primitives'`` : List[str] - list of parameters having primitive data types. - - ``'complex'``: List[str], list of parameters having complex data types (e.g., functions, models, optimizers etc.) + - ``'complex'`` : List[str] - list of parameters having complex data types (e.g., functions, models,\ + optimizers etc.). """ @@ -340,10 +345,10 @@ def __init__(self, Parameters ---------- predictor - A callable that takes a tensor of N data points as inputs and returns N outputs. For classification task, - the second dimension of the output should match the number of classes. Thus, the output can be either - a soft label distribution or a hard label distribution (i.e. one-hot encoding) without affecting the - performance since `argmax` is applied to the predictor's output. + A callable that takes a `numpy` array of `N` data points as inputs and returns `N` outputs. For + classification task, the second dimension of the output should match the number of classes. Thus, the + output can be either a soft label distribution or a hard label distribution (i.e. one-hot encoding) + without affecting the performance since `argmax` is applied to the predictor's output. encoder Pretrained encoder network. decoder @@ -353,12 +358,12 @@ def __init__(self, coeff_consistency Consistency loss coefficient. latent_dim - Autoencoder latent dimension. Can be omitted if the actor network is user specified. + Auto-encoder latent dimension. Can be omitted if the actor network is user specified. backend - Deep learning backend: `tensorflow` | `pytorch`. Default `tensorflow`. + Deep learning backend: ``'tensorflow'`` | ``'pytorch'``. Default ``'tensorflow'``. seed - Seed for reproducibility. The results are not reproducible for `tensorflow` backend. - kwargs + Seed for reproducibility. The results are not reproducible for ``'tensorflow'`` backend. + **kwargs Used to replace any default parameter from :py:data:`alibi.explainers.cfrl_base.DEFAULT_BASE_PARAMS`. """ super().__init__(meta=deepcopy(DEFAULT_META_CFRL)) @@ -405,7 +410,7 @@ def __init__(self, @staticmethod def _serialize_params(params: Dict[str, Any]) -> Dict[str, Any]: """ - Parameter serialization. The function replaces object by human-readable representation + Parameter serialization. The function replaces object by human-readable representation. Parameters ---------- @@ -440,7 +445,7 @@ def _serialize_params(params: Dict[str, Any]) -> Dict[str, Any]: def _get_name(a: Any) -> str: """ Constructs a name for the given object. If the object has as built-in name, the name is return. - If the object has a built-in class name, the name of the class is returned. Otherwise `unknown` is returned. + If the object has a built-in class name, the name of the class is returned. Otherwise ``'unknown'`` is returned. Parameters ---------- @@ -460,7 +465,7 @@ def _get_name(a: Any) -> str: return "unknown" @staticmethod - def _verify_backend(backend): + def _verify_backend(backend: str): """ Verifies if the backend is supported. @@ -480,14 +485,16 @@ def _verify_backend(backend): elif backend not in [Framework.PYTORCH, Framework.TENSORFLOW]: raise NotImplementedError(f'{backend} not implemented. Use `tensorflow` or `pytorch` instead.') - def _select_backend(self, backend, **kwargs): + def _select_backend(self, backend: str, **kwargs): """ Selects the backend according to the `backend` flag. Parameters --------- backend - Deep learning backend: `tensorflow` | `pytorch`. Default `tensorflow`. + Deep learning backend: ``'tensorflow'`` | ``'pytorch'``. Default `tensorflow`. + **kwargs + Other arguments. Not used. """ return tensorflow_base_backend if backend == "tensorflow" else pytorch_base_backend @@ -507,19 +514,21 @@ def _validate_kwargs(self, Parameters ---------- predictor. - A callable that takes a tensor of N data points as inputs and returns N outputs. + A callable that takes a `numpy` array of `N` data points as inputs and returns `N` outputs. encoder Pretrained encoder network. decoder Pretrained decoder network. latent_dim - Autoencoder latent dimension. + Auto-encoder latent dimension. coeff_sparsity Sparsity loss coefficient. coeff_consistency Consistency loss coefficient. backend - Deep learning backend: `tensorflow` | `pytorch`. + Deep learning backend: ``'tensorflow'`` | ``'pytorch'``. + **kwargs + Other arguments. """ # Copy default parameters. params = deepcopy(DEFAULT_BASE_PARAMS) @@ -596,12 +605,12 @@ def load(cls, path: Union[str, os.PathLike], predictor: Any) -> "Explainer": def reset_predictor(self, predictor: Any) -> None: """ - Resets the predictor to be explained. + Resets the predictor. Parameters ---------- predictor - New predictor to be set. + New predictor. """ self.params["predictor"] = predictor self.meta["params"].update(CounterfactualRL._serialize_params(self.params)) @@ -778,7 +787,7 @@ def _is_classification(pred: np.ndarray) -> bool: Returns ------- - `True` if the prediction has shape of 2 and the second dimension bigger grater than 1. `False` otherwise. + ``True`` if the prediction has shape of 2 and the second dimension bigger grater than 1. ``False`` otherwise. """ return len(pred.shape) == 2 and pred.shape[1] > 1 @@ -797,15 +806,19 @@ def explain(self, # type: ignore[override] Y_t Counterfactual targets. C - Conditional vectors. If `None`, it means that no conditioning was used during training (i.e. the - `conditional_func` returns `None`). + Conditional vectors. If ``None``, it means that no conditioning was used during training (i.e. the + `conditional_func` returns ``None``). batch_size Batch size to be used when generating counterfactuals. Returns ------- - `Explanation` object containing the inputs with the corresponding labels, the counterfactuals with the \ - corresponding labels, targets and additional metadata. + explanation + `Explanation` object containing the counterfactual with additional metadata as attributes. \ + See usage at `CFRL examples`_ for details. + + .. _CFRL examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/CFRL.html """ # General validation. self._validate_target(Y_t) @@ -858,7 +871,7 @@ def _compute_counterfactual(self, Y_t: np.ndarray, C: Optional[np.ndarray] = None) -> Dict[str, Optional[np.ndarray]]: # TODO: TypedDict """ - Compute counterfactual instance for a given input, target and condition vector + Compute counterfactual instance for a given input, target and condition vector. Parameters ---------- @@ -867,14 +880,14 @@ def _compute_counterfactual(self, Y_t Counterfactual targets. C - Conditional vector. If `None`, it means that no conditioning was used during training (i.e. the - `conditional_func` returns `None`). + Conditional vector. If ``None``, it means that no conditioning was used during training (i.e. the + `conditional_func` returns ``None``). Returns ------- - Dictionary containing the input instances in the original format, input classification labels, - counterfactual instances in the original format, counterfactual classification labels, target labels, - conditional vectors. + Dictionary containing the input instances in the original format, input classification labels, + counterfactual instances in the original format, counterfactual classification labels, target labels, + conditional vectors. """ # Save original input for later usage. X_orig = X @@ -966,13 +979,13 @@ def _build_explanation(self, Y_t Target labels. C - Condition vector. If `None`, it means that no conditioning was used during training (i.e. the - `conditional_func` returns `None`). + Condition vector. If ``None``, it means that no conditioning was used during training (i.e. the + `conditional_func` returns ``None``). Returns ------- - `Explanation` object containing the inputs with the corresponding labels, the counterfactuals with the - corresponding labels, targets and additional metadata. + `Explanation` object containing the inputs with the corresponding labels, the counterfactuals with the \ + corresponding labels, targets and additional metadata. """ data = deepcopy(DEFAULT_DATA_CFRL) @@ -1000,20 +1013,20 @@ def __call__(self, X_cf: Any, X: np.ndarray, C: Optional[np.ndarray]) -> Any: ---------- X_cf Counterfactual instance. The datatype depends on the output of the decoder. For example, for an image - dataset, the output is `np.ndarray`. For a tabular dataset, the output is `List[np.ndarray]` where each + dataset, the output is ``np.ndarray``. For a tabular dataset, the output is ``List[np.ndarray]`` where each element of the list corresponds to a feature. This corresponds to the decoder's output from the heterogeneous autoencoder (see :py:class:`alibi.models.tensorflow.autoencoder.HeAE` and :py:class:`alibi.models.pytorch.autoencoder.HeAE`). X Input instance. C - Conditional vector. If `None`, it means that no conditioning was used during training (i.e. the - `conditional_func` returns `None`). + Conditional vector. If ``None``, it means that no conditioning was used during training (i.e. the + `conditional_func` returns ``None``). Returns ------- X_cf - Post-processed X_cf. + Post-processed `X_cf`. """ pass @@ -1040,47 +1053,47 @@ def __call__(self, bound to 1. model CounterfactualRL explainer. All the parameters defined in - :py:data:`alibi.explainers.cfrl_base.DEFAULT_BASE_PARAMS` can be accessed through 'model.params'. + :py:data:`alibi.explainers.cfrl_base.DEFAULT_BASE_PARAMS` can be accessed through `model.params`. sample Dictionary of samples used for an update which contains - - ``'X'``: input instances. + - ``'X'`` : ``np.ndarray`` - input instances. - - ``'Y_m'``: predictor outputs for the input instances. + - ``'Y_m'`` : ``np.ndarray`` - predictor outputs for the input instances. - - ``'Y_t'``: target outputs. + - ``'Y_t'`` : ``np.ndarray`` - target outputs. - - ``'Z'``: input embeddings. + - ``'Z'`` : ``np.ndarray`` - input embeddings. - - ``'Z_cf_tilde'``: noised counterfactual embeddings. + - ``'Z_cf_tilde'`` : ``np.ndarray`` - noised counterfactual embeddings. - - ``'X_cf_tilde'``: noised counterfactual instances obtained ofter decoding the noised counterfactual \ - embeddings `Z_cf_tilde` and apply post-processing functions. + - ``'X_cf_tilde'`` : ``np.ndarray`` - noised counterfactual instances obtained ofter decoding the \ + noised counterfactual embeddings `Z_cf_tilde` and apply post-processing functions. - - ``'C'``: conditional vector. + - ``'C'`` : ``Optional[np.ndarray]`` - conditional vector. - - ``'R_tilde'``: reward obtained for the noised counterfactual instances. + - ``'R_tilde'`` : ``np.ndarray`` - reward obtained for the noised counterfactual instances. - - ``'Z_cf'``: counterfactual embeddings. + - ``'Z_cf'`` : ``np.ndarray`` - counterfactual embeddings. - - ``'X_cf'``: counterfactual instances obtained after decoding the countefactual embeddings `Z_cf` and \ - apply post-processing functions. + - ``'X_cf'`` : ``np.ndarray`` - counterfactual instances obtained after decoding the counterfactual \ + embeddings `Z_cf` and apply post-processing functions. losses Dictionary of losses which contains - - ``'loss_actor'``: actor network loss. + - ``'loss_actor'`` : ``Callable`` - actor network loss. - - ``'loss_critic'``: critic network loss. + - ``'loss_critic'`` : ``Callable`` - critic network loss. - - ``'sparsity_loss'``: sparsity loss for the \ + - ``'sparsity_loss'`` : ``Callable`` - sparsity loss for the \ :py:class:`alibi.explainers.cfrl_base.CounterfactualRL` class. - - ``'sparsity_num_loss'``: numerical features sparsity loss for the \ + - ``'sparsity_num_loss'`` : ``Callable`` - numerical features sparsity loss for the \ :py:class:`alibi.explainers.cfrl_tabular.CounterfactualRLTabular` class. - - ``'sparsity_cat_loss'``: categorical features sparsity loss for the \ + - ``'sparsity_cat_loss'`` : ``Callable`` - categorical features sparsity loss for the \ :py:class:`alibi.explainers.cfrl_tabular.CounterfactualRLTabular` class. - - ``'consistency_loss'``: consistency loss if used. + - ``'consistency_loss'`` : ``Callable`` - consistency loss if used. """ pass diff --git a/alibi/explainers/cfrl_tabular.py b/alibi/explainers/cfrl_tabular.py index 48200b0bf..2e13e86a7 100644 --- a/alibi/explainers/cfrl_tabular.py +++ b/alibi/explainers/cfrl_tabular.py @@ -27,7 +27,7 @@ class SampleTabularPostprocessing(Postprocessing): """ - Tabular sampling post-processing. Given the output of the heterogeneous autoencoder the post-processing + Tabular sampling post-processing. Given the output of the heterogeneous auto-encoder the post-processing functions samples the output according to the conditional vector. Note that the original input instance is required to perform the conditional sampling. """ @@ -44,7 +44,7 @@ def __init__(self, category_map: Dict[int, List[str]], stats: Dict[int, Dict[str stats Dictionary of statistic of the training data. Contains the minimum and maximum value of each numerical feature in the training set. Each key is an index of the column and each value is another dictionary - containing `min` and `max` keys. + containing ``'min'`` and ``'max'`` keys. """ super().__init__() self.category_map = category_map @@ -52,7 +52,7 @@ def __init__(self, category_map: Dict[int, List[str]], stats: Dict[int, Dict[str def __call__(self, X_cf: List[np.ndarray], X: np.ndarray, C: Optional[np.ndarray]) -> List[np.ndarray]: """ - Performs counterfactual conditional sampling acording to the conditional vector and the original input. + Performs counterfactual conditional sampling according to the conditional vector and the original input. Parameters ---------- @@ -66,7 +66,7 @@ def __call__(self, X_cf: List[np.ndarray], X: np.ndarray, C: Optional[np.ndarray Returns ------- - Conditional sampled counterfactual instance. + Conditional sampled counterfactual instance. """ return sample(X_hat_split=X_cf, X_ohe=X, @@ -93,7 +93,7 @@ def __call__(self, X_cf: List[np.ndarray], X: np.ndarray, C: Optional[np.ndarray Returns ------- - Concatenation of the counterfactual feature columns. + Concatenation of the counterfactual feature columns. """ return np.concatenate(X_cf, axis=1) @@ -128,21 +128,21 @@ def __init__(self, Parameters ---------- - predictor. - A callable that takes a tensor of N data points as inputs and returns N outputs. For classification task, - the second dimension of the output should match the number of classes. Thus, the output can be either - a soft label distribution or a hard label distribution (i.e. one-hot encoding) without affecting the - performance since `argmax` is applied to the predictor's output. + predictor + A callable that takes a `numpy` array of `N` data points as inputs and returns `N` outputs. For + classification task, the second dimension of the output should match the number of classes. Thus, the + output can be either a soft label distribution or a hard label distribution (i.e. one-hot encoding) + without affecting the performance since `argmax` is applied to the predictor's output. encoder Pretrained heterogeneous encoder network. decoder Pretrained heterogeneous decoder network. The output of the decoder must be a list of tensors. encoder_preprocessor - Autoencoder data pre-processor. Depending on the input format, the pre-processor can normalize + Auto-encoder data pre-processor. Depending on the input format, the pre-processor can normalize numerical attributes, transform label encoding to one-hot encoding etc. decoder_inv_preprocessor - Autoencoder data inverse pre-processor. This is the invers function of the pre-processor. It can - denormalize numerical attributes, transfrom one-hot encoding to label encoding, feature type casting etc. + Auto-encoder data inverse pre-processor. This is the inverse function of the pre-processor. It can + denormalize numerical attributes, transform one-hot encoding to label encoding, feature type casting etc. coeff_sparsity Sparsity loss coefficient. coeff_consistency @@ -155,26 +155,27 @@ def __init__(self, immutable_features List of immutable features. ranges - Numerical feature ranges. Note that exist numerical features such as `Age`, which are allowed to increase - only. We denote those by `inc_feat`. Similarly, there exist features allowed to decrease only. We denote - them by `dec_feat`. Finally, there are some free feature, which we denote by `free_feat`. With the previous - notation, we can define `range = {'inc_feat': [0, 1], 'dec_feat': [-1, 0], 'free_feat': [-1, 1]}`. - `free_feat` can be omitted, as any unspecified feature is considered free. Having the ranges of a feature - `{'feat': [a_low, a_high}`, when sampling is performed the numerical value will be clipped between - `[a_low * (max_val - min_val), a_high * [max_val - min_val]]`, where `a_low` and `a_high` are the minimum - and maximum values the feature `feat`. This implies that `a_low` and `a_high` are not restricted to {-1, 0} - and {0, 1}, but can be any float number in-between `[-1, 0]` and `[0, 1]`. + Numerical feature ranges. Note that exist numerical features such as ``'Age'``, which are allowed to + increase only. We denote those by ``'inc_feat'``. Similarly, there exist features allowed to decrease only. + We denote them by ``'dec_feat'``. Finally, there are some free feature, which we denote by ``'free_feat'``. + With the previous notation, we can define ``range = {'inc_feat': [0, 1], 'dec_feat': [-1, 0], + 'free_feat': [-1, 1]}``. ``'free_feat'`` can be omitted, as any unspecified feature is considered free. + Having the ranges of a feature `{'feat': [a_low, a_high}`, when sampling is performed the numerical value + will be clipped between `[a_low * (max_val - min_val), a_high * [max_val - min_val]]`, where `a_low` and + `a_high` are the minimum and maximum values the feature ``'feat'``. This implies that `a_low` and `a_high` + are not restricted to ``{-1, 0}`` and ``{0, 1}``, but can be any float number in-between `[-1, 0]` and + `[0, 1]`. weight_num Numerical loss weight. weight_cat Categorical loss weight. latent_dim - Autoencoder latent dimension. Can be omitted if the actor network is user specified. + Auto-encoder latent dimension. Can be omitted if the actor network is user specified. backend - Deep learning backend: `tensorflow` | `pytorch`. Default `tensorflow`. + Deep learning backend: ``'tensorflow'`` | ``'pytorch'``. Default ``'tensorflow'``. seed - Seed for reproducibility. The results are not reproducible for `tensorflow` backend. - kwargs + Seed for reproducibility. The results are not reproducible for ``'tensorflow'`` backend. + **kwargs Used to replace any default parameter from :py:data:`alibi.explainers.cfrl_base.DEFAULT_BASE_PARAMS`. """ super().__init__(encoder=encoder, decoder=decoder, latent_dim=latent_dim, predictor=predictor, @@ -231,7 +232,7 @@ def _select_backend(self, backend, **kwargs): Parameters ---------- backend - Deep learning backend. `tensorflow` | `pytorch`. Default `tensorflow`. + Deep learning backend. ``'tensorflow'`` | ``'pytorch'``. Default ``'tensorflow'``. """ return tensorflow_tabular_backend if backend == "tensorflow" else pytorch_tabular_backend @@ -285,7 +286,6 @@ def explain(self, # type: ignore[override] num_samples: int = 1, patience: int = 1000, tolerance: float = 1e-3) -> Explanation: - """ Computes counterfactuals for the given instances conditioned on the target and the conditional vector. @@ -296,14 +296,14 @@ def explain(self, # type: ignore[override] Y_t Target labels. C - List of conditional dictionaries. If `None`, it means that no conditioning was used during training - (i.e. the `conditional_func` returns `None`). If conditioning was used during training but no conditioning - is desired for the current input, an empty list is expected. + List of conditional dictionaries. If ``None``, it means that no conditioning was used during training + (i.e. the `conditional_func` returns ``None``). If conditioning was used during training but no + conditioning is desired for the current input, an empty list is expected. diversity Whether to generate diverse counterfactual set for the given instance. Only supported for a single input instance. num_samples - Number of diversity samples to be generated. Considered only if `diversity=True`. + Number of diversity samples to be generated. Considered only if ``diversity=True``. batch_size Batch size to use when generating counterfactuals. patience @@ -311,6 +311,15 @@ def explain(self, # type: ignore[override] the desired number of samples has been found. tolerance Tolerance to distinguish two counterfactual instances. + + Returns + ------- + explanation + `Explanation` object containing the counterfactual with additional metadata as attributes. \ + See usage `CFRL examples`_ for details. + + .. _CFRL examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/CFRL.html """ # General validation. self._validate_input(X) @@ -353,7 +362,7 @@ def explain(self, # type: ignore[override] # Check the number of conditions. if len(C) != 1 and len(C) != X.shape[0]: - raise ValueError("The number of conditions should be 1 or equals the number of samples in x.") + raise ValueError("The number of conditions should be 1 or equals the number of samples in X.") # If only one condition is passed. if len(C) == 1: @@ -395,14 +404,14 @@ def _diversity(self, Y_t Target label. C - List of conditional dictionaries. If `None`, it means that no conditioning was used during training - (i.e. the `conditional_func` returns `None`). + List of conditional dictionaries. If ``None``, it means that no conditioning was used during training + (i.e. the `conditional_func` returns ``None``). num_samples Number of counterfactual samples to be generated. batch_size Batch size used at inference. num_samples - Number of diversity samples to be generated. Considered only if `diversity=True`. + Number of diversity samples to be generated. Considered only if ``diversity=True``. batch_size Batch size to use when generating counterfactuals. patience @@ -413,7 +422,7 @@ def _diversity(self, Returns ------- - Explanation object containing the diverse counterfactuals. + Explanation object containing the diverse counterfactuals. """ # Check if condition. If no conditioning was used during training, the method can not generate a diverse # set of counterfactual instances diff --git a/alibi/explainers/counterfactual.py b/alibi/explainers/counterfactual.py index 96acbd08b..12e6e203c 100644 --- a/alibi/explainers/counterfactual.py +++ b/alibi/explainers/counterfactual.py @@ -22,15 +22,15 @@ def _define_func(predict_fn: Callable, Parameters ---------- predict_fn - Classifier prediction function + Classifier prediction function. pred_class - Predicted class of the instance to be explained + Predicted class of the instance to be explained. target_class - Target class of the explanation, one of 'same', 'other' or an integer class + Target class of the explanation, one of ``'same'``, ``'other'`` or an integer class. Returns ------- - Class-specific prediction function and the target class used. + Class-specific prediction function and the target class used. """ if target_class == 'other': @@ -100,44 +100,44 @@ def __init__(self, Parameters ---------- predict_fn - Keras or TensorFlow model or any other model's prediction function returning class probabilities + `tensorflow` model or any other model's prediction function returning class probabilities. shape - Shape of input data starting with batch size + Shape of input data starting with batch size. distance_fn - Distance function to use in the loss term + Distance function to use in the loss term. target_proba - Target probability for the counterfactual to reach + Target probability for the counterfactual to reach. target_class - Target class for the counterfactual to reach, one of 'other', 'same' or an integer denoting - desired class membership for the counterfactual instance + Target class for the counterfactual to reach, one of ``'other'``, ``'same'`` or an integer denoting + desired class membership for the counterfactual instance. max_iter - Maximum number of interations to run the gradient descent for (inner loop) + Maximum number of iterations to run the gradient descent for (inner loop). early_stop - Number of steps after which to terminate gradient descent if all or none of found instances are solutions + Number of steps after which to terminate gradient descent if all or none of found instances are solutions. lam_init - Initial regularization constant for the prediction part of the Wachter loss + Initial regularization constant for the prediction part of the Wachter loss. max_lam_steps - Maximum number of times to adjust the regularization constant (outer loop) before terminating the search + Maximum number of times to adjust the regularization constant (outer loop) before terminating the search. tol - Tolerance for the counterfactual target probability + Tolerance for the counterfactual target probability. learning_rate_init - Initial learning rate for each outer loop of lambda + Initial learning rate for each outer loop of `lambda`. feature_range - Tuple with min and max ranges to allow for perturbed instances. Min and max ranges can be floats or - numpy arrays with dimension (1 x nb of features) for feature-wise ranges + Tuple with `min` and `max` ranges to allow for perturbed instances. `Min` and `max` ranges can be `float` + or `numpy` arrays with dimension (1 x nb of features) for feature-wise ranges. eps Gradient step sizes used in calculating numerical gradients, defaults to a single value for all - features, but can be passed an array for feature-wise step sizes + features, but can be passed an array for feature-wise step sizes. init - Initialization method for the search of counterfactuals, currently must be 'identity' + Initialization method for the search of counterfactuals, currently must be ``'identity'``. decay - Flag to decay learning rate to zero for each outer loop over lambda + Flag to decay learning rate to zero for each outer loop over lambda. write_dir - Directory to write Tensorboard files to + Directory to write `tensorboard` files to. debug - Flag to write Tensorboard summaries for debugging + Flag to write `tensorboard` summaries for debugging. sess - Optional Tensorflow session that will be used if passed instead of creating or inferring one internally + Optional `tensorflow` session that will be used if passed instead of creating or inferring one internally. """ super().__init__(meta=copy.deepcopy(DEFAULT_META_CF)) # get params for storage in meta @@ -305,6 +305,17 @@ def fit(self, """ Fit method - currently unused as the counterfactual search is fully unsupervised. + Parameters + ---------- + X + Not used. Included for consistency. + y + Not used. Included for consistency. + + Returns + ------- + self + Explainer itself. """ # TODO feature ranges, epsilons and MADs @@ -318,12 +329,16 @@ def explain(self, X: np.ndarray) -> Explanation: Parameters ---------- X - Instance to be explained + Instance to be explained. Returns ------- - `Explanation` object containing the counterfactual with additional metadata as attributes. + explanation + `Explanation` object containing the counterfactual with additional metadata as attributes. + See usage at `Counterfactual examples`_ for details. + .. _Counterfactual examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/CF.html """ # TODO change init parameters on the fly @@ -614,4 +629,12 @@ def _minimize_loss(self, self.return_dict['success'] = True def reset_predictor(self, predictor: Union[Callable, tf.keras.Model]) -> None: + """ + Resets the predictor function/model. + + Parameters + ---------- + predictor + New predictor function/model. + """ raise NotImplementedError('Resetting a predictor is currently not supported') diff --git a/alibi/explainers/integrated_gradients.py b/alibi/explainers/integrated_gradients.py index b0d768658..b84e1ad10 100644 --- a/alibi/explainers/integrated_gradients.py +++ b/alibi/explainers/integrated_gradients.py @@ -31,7 +31,7 @@ def _compute_convergence_delta(model: Union[tf.keras.models.Model], Parameters ---------- model - Tensorflow or keras model. + `tensorflow` model. input_dtypes List with data types of the inputs. attributions @@ -45,11 +45,11 @@ def _compute_convergence_delta(model: Union[tf.keras.models.Model], target Target for which the gradients are calculated for classification models. _is_list - Whether the model's input is a list (multiple inputs) or a np array (single input). + Whether the model's input is a `list` (multiple inputs) or a `np.narray` (single input). Returns ------- - Convergence deltas for each data point. + Convergence deltas for each data point. """ if forward_kwargs is None: forward_kwargs = {} @@ -98,7 +98,7 @@ def _sum_rows(inp): def _select_target(preds: tf.Tensor, targets: Union[None, tf.Tensor, np.ndarray, list]) -> tf.Tensor: """ - Select the predictions corresponding to the targets if targets is not None. + Select the predictions corresponding to the targets if targets is not ``None``. Parameters ---------- @@ -106,9 +106,10 @@ def _select_target(preds: tf.Tensor, Predictions before selection. targets Targets to select. + Returns ------- - Selected predictions + Selected predictions. """ if targets is not None: @@ -126,21 +127,23 @@ def _run_forward(model: Union[tf.keras.models.Model], target: Union[None, tf.Tensor, np.ndarray, list], forward_kwargs: Optional[dict] = None) -> tf.Tensor: """ - Returns the output of the model. If the target is not `None`, only the output for the selected target is returned. + Returns the output of the model. If the target is not ``None``, only the output for the selected + target is returned. Parameters ---------- model - Tensorflow or keras model. + `tensorflow` model. x Input data point. target Target for which the gradients are calculated for classification models. forward_kwargs Input keyword args. + Returns ------- - Model output or model output after target selection for classification models. + Model output or model output after target selection for classification models. """ if forward_kwargs is None: @@ -169,30 +172,30 @@ def _run_forward_from_layer(model: tf.keras.models.Model, Parameters ---------- model - Tensorflow or keras model. + `tensorflow` model. layer Starting layer for the forward call. orig_call Original `call` method of the layer. orig_dummy_input Dummy input needed to initiate the model forward call. The number of instances in the dummy input must - be the same as the number of instances in x. The dummy input values play no role in the evaluation + be the same as the number of instances in `x`. The dummy input values play no role in the evaluation as the layer's status is overwritten during the forward call. x Layer's inputs. The layer's status is overwritten with `x` during the forward call. target Target for the output position to be returned. forward_kwargs - Input keyword args. It must be a dict with numpy arrays as values. If it's not None, - the first dimension of the arrays must correspond to the number of instances in x and orig_dummy_input. + Input keyword args. It must be a dict with `numpy` arrays as values. If it's not ``None``, + the first dimension of the arrays must correspond to the number of instances in `x` and orig_dummy_input. run_from_layer_inputs - If True, the forward pass starts from the layer's inputs, if False it starts from the layer's outputs. + If ``True``, the forward pass starts from the layer's inputs, if ``False`` it starts from the layer's outputs. select_target Whether to return predictions for selected targets or return predictions for all targets. Returns ------- - Model's predictions for the given target. + Model's predictions for the given target. """ @@ -245,7 +248,7 @@ def _run_forward_to_layer(model: tf.keras.models.Model, Parameters ---------- model - Tensorflow or keras model. + `tensorflow` model. layer Starting layer for the forward call. orig_call @@ -255,11 +258,11 @@ def _run_forward_to_layer(model: tf.keras.models.Model, forward_kwargs Input keyword args. run_to_layer_inputs - If True, the layer's inputs are returned. If False, the layer's output's are returned. + If ``True``, the layer's inputs are returned. If ``False``, the layer's output's are returned. Returns ------- - Output of the given layer. + Output of the given layer. """ if forward_kwargs is None: @@ -316,7 +319,7 @@ def _forward_input_baseline(X: Union[List[np.ndarray], np.ndarray], bls Baselines. model - Tensorflow or keras model. + `tensorflow` model. layer Desired layer output. orig_call @@ -324,11 +327,12 @@ def _forward_input_baseline(X: Union[List[np.ndarray], np.ndarray], forward_kwargs Input keyword args. forward_to_inputs - If True, X and bls are forwarded to the layer's input. If False, they are forwarded to the layer's outputs. + If ``True``, `X` and bls are forwarded to the layer's input. If ``False``, they are forwarded to + the layer's outputs. Returns ------- - Forwarded inputs and baselines as a numpy arrays. + Forwarded inputs and baselines as a `numpy` arrays. """ if forward_kwargs is None: @@ -370,16 +374,17 @@ def _gradients_input(model: Union[tf.keras.models.Model], Parameters ---------- model - Tensorflow or keras model. + `tensorflow` model. x Input data point. target Target for which the gradients are calculated if the output dimension is higher than 1. forward_kwargs Input keyword args. + Returns ------- - Gradients for each input feature. + Gradients for each input feature. """ if forward_kwargs is None: @@ -408,12 +413,12 @@ def _gradients_layer(model: Union[tf.keras.models.Model], Parameters ---------- model - Tensorflow or keras model. + `tensorflow` model. layer Layer of the model with respect to which the gradients are calculated. orig_call Original `call` method of the layer. This is necessary since the call method is modified by the function - in order to make the layer output visible to the GradientTape. + in order to make the layer output visible to the `GradientTape`. x Input data point. target @@ -421,12 +426,12 @@ def _gradients_layer(model: Union[tf.keras.models.Model], forward_kwargs Input keyword args. compute_layer_inputs_gradients - If True, gradients are computed with respect to the layer's inputs. - If False, they are computed with respect to the layer's outputs. + If ``True``, gradients are computed with respect to the layer's inputs. + If ``False``, they are computed with respect to the layer's outputs. Returns ------- - Gradients for each element of layer. + Gradients for each element of layer. """ @@ -498,7 +503,7 @@ def wrapper(*args, **kwargs): def _format_baseline(X: np.ndarray, baselines: Union[None, int, float, np.ndarray]) -> np.ndarray: """ - Formats baselines to return a numpy array. + Formats baselines to return a `numpy` array. Parameters ---------- @@ -509,7 +514,7 @@ def _format_baseline(X: np.ndarray, Returns ------- - Formatted inputs and baselines as a numpy arrays. + Formatted inputs and baselines as a `numpy` arrays. """ if baselines is None: @@ -538,7 +543,7 @@ def _format_target(target: Union[None, int, list, np.ndarray], Returns ------- - Formatted target as a list. + Formatted target as a list. """ if target is not None: @@ -565,7 +570,7 @@ def _get_target_from_target_fn(target_fn: Callable, target_fn Target function. model - Model + Model. X Data to be explained. forward_kwargs @@ -573,7 +578,7 @@ def _get_target_from_target_fn(target_fn: Callable, Returns ------- - Integer array of dimension (N, ). + Integer array of dimension `(N, )`. """ if forward_kwargs is None: preds = model(X) @@ -610,7 +615,7 @@ def _sum_integral_terms(step_sizes: list, Returns ------- - Sums of the gradients along the chosen path. + Sums of the gradients along the chosen path. """ input_str = string.ascii_lowercase[1: len(grads.shape)] @@ -642,7 +647,7 @@ def _calculate_sum_int(batches: List[List[tf.Tensor]], batches List of batch gradients. model - tf.keras or keras model. + `tf.keras` or `keras` model. target List of targets. target_paths @@ -658,7 +663,7 @@ def _calculate_sum_int(batches: List[List[tf.Tensor]], Returns ------- - + Sums of the gradients along the chosen path. """ grads = tf.concat(batches[j], 0) shape = grads.shape[1:] @@ -685,13 +690,9 @@ def _validate_output(model: tf.keras.Model, Parameters ---------- model - Keras model for which the output is validated. + `Keras` model for which the output is validated. target Targets for which gradients are calculated - - Returns - ------- - """ if not model.output_shape or not any(isinstance(model.output_shape, t) for t in _valid_output_shape_type): raise NotImplementedError(f"The model output_shape attribute must be in {_valid_output_shape_type}. " @@ -718,21 +719,20 @@ def __init__(self, internal_batch_size: int = 100 ) -> None: """ - An implementation of the integrated gradients method for Tensorflow and Keras models. + An implementation of the integrated gradients method for `tensorflow` models. - For details of the method see the original paper: - https://arxiv.org/abs/1703.01365 . + For details of the method see the original paper: https://arxiv.org/abs/1703.01365 . Parameters ---------- model - Tensorflow or Keras model. + `tensorflow` model. layer Layer with respect to which the gradients are calculated. If not provided, the gradients are calculated with respect to the input. method Method for the integral approximation. Methods available: - "riemann_left", "riemann_right", "riemann_middle", "riemann_trapezoid", "gausslegendre". + ``"riemann_left"``, ``"riemann_right"``, ``"riemann_middle"``, ``"riemann_trapezoid"``, ``"gausslegendre"``. n_steps Number of step in the path integral approximation from the baseline to the input instance. internal_batch_size @@ -788,13 +788,13 @@ def explain(self, X Instance for which integrated gradients attribution are computed. forward_kwargs - Input keyword args. If it's not None, it must be a dict with numpy arrays as values. + Input keyword args. If it's not ``None``, it must be a dict with `numpy` arrays as values. The first dimension of the arrays must correspond to the number of examples. - It will be repeated for each of n_steps along the integrated path. + It will be repeated for each of `n_steps` along the integrated path. The attributions are not computed with respect to these arguments. baselines Baselines (starting point of the path integral) for each instance. - If the passed value is an `np.ndarray` must have the same shape as X. + If the passed value is an `np.ndarray` must have the same shape as `X`. If not provided, all features values for the baselines are set to 0. target Defines which element of the model output is considered to compute the gradients. @@ -805,13 +805,16 @@ def explain(self, For classification models `target` can be either the true classes or the classes predicted by the model. attribute_to_layer_inputs In case of layers gradients, controls whether the gradients are computed for the layer's inputs or - outputs. If True, gradients are computed for the layer's inputs, if False for the layer's outputs. + outputs. If ``True``, gradients are computed for the layer's inputs, if ``False`` for the layer's outputs. Returns ------- + explanation `Explanation` object including `meta` and `data` attributes with integrated gradients attributions - for each feature. + for each feature. See usage at `IG examples`_ for details. + .. _IG examples: + https://docs.seldon.io/projects/alibi/en/latest/methods/IntegratedGradients.html """ # target handling logic if self.target_fn and target is not None: @@ -974,7 +977,7 @@ def explain(self, target, self._is_list) - return self.build_explanation( + return self._build_explanation( X=X, forward_kwargs=forward_kwargs, baselines=baselines, # type: ignore[arg-type] @@ -983,13 +986,13 @@ def explain(self, deltas=deltas ) - def build_explanation(self, - X: Union[List[np.ndarray], np.ndarray], - forward_kwargs: Optional[dict], - baselines: List[np.ndarray], - target: Optional[List[int]], - attributions: Union[List[np.ndarray], List[tf.Tensor]], - deltas: np.ndarray) -> Explanation: + def _build_explanation(self, + X: Union[List[np.ndarray], np.ndarray], + forward_kwargs: Optional[dict], + baselines: List[np.ndarray], + target: Optional[List[int]], + attributions: Union[List[np.ndarray], List[tf.Tensor]], + deltas: np.ndarray) -> Explanation: if forward_kwargs is None: forward_kwargs = {} data = copy.deepcopy(DEFAULT_DATA_INTGRAD) @@ -1007,6 +1010,14 @@ def build_explanation(self, return Explanation(meta=copy.deepcopy(self.meta), data=data) def reset_predictor(self, predictor: Union[tf.keras.Model]) -> None: + """ + Resets the predictor model. + + Parameters + ---------- + predictor + New prediction model. + """ # TODO: check what else should be done (e.g. validate dtypes again?) self.model = predictor @@ -1033,18 +1044,18 @@ def _compute_attributions_list_input(self, step_sizes Weights in the path integral sum. alphas - Interpolation parameter defining the points of the interal path. + Interpolation parameter defining the points of the integral path. nb_samples Total number of samples. forward_kwargs Input keywords args. compute_layer_inputs_gradients In case of layers gradients, controls whether the gradients are computed for the layer's inputs or - outputs. If True, gradients are computed for the layer's inputs, if False for the layer's outputs. + outputs. If ``True``, gradients are computed for the layer's inputs, if ``False`` for the layer's outputs. Returns ------- - Tuple with integrated gradients attributions, deltas and predictions + Tuple with integrated gradients attributions, deltas and predictions. """ if forward_kwargs is None: @@ -1157,18 +1168,18 @@ def _compute_attributions_tensor_input(self, step_sizes Weights in the path integral sum. alphas - Interpolation parameter defining the points of the interal path. + Interpolation parameter defining the points of the integral path. nb_samples Total number of samples. forward_kwargs Inputs keywords args. compute_layer_inputs_gradients In case of layers gradients, controls whether the gradients are computed for the layer's inputs or - outputs. If True, gradients are computed for the layer's inputs, if False for the layer's outputs. + outputs. If ``True``, gradients are computed for the layer's inputs, if ``False`` for the layer's outputs. Returns ------- - Tuple with integrated gradients attributions, deltas and predictions + Tuple with integrated gradients attributions, deltas and predictions. """ if forward_kwargs is None: forward_kwargs = {} diff --git a/alibi/explainers/shap_wrappers.py b/alibi/explainers/shap_wrappers.py index 3dd8fe170..97a4bd246 100644 --- a/alibi/explainers/shap_wrappers.py +++ b/alibi/explainers/shap_wrappers.py @@ -38,7 +38,7 @@ def rank_by_importance(shap_values: List[np.ndarray], Parameters ---------- shap_values - Each element corresponds to a samples x features array of shap values corresponding + Each element corresponds to a `samples x features` array of shap values corresponding to each model output. feature_names Each element is the name of the column with the corresponding index in each of the @@ -106,21 +106,21 @@ def rank_by_importance(shap_values: List[np.ndarray], def sum_categories(values: np.ndarray, start_idx: Sequence[int], enc_feat_dim: Sequence[int]): """ - This function is used to reduce specified slices in a two- or three- dimensional tensor. + This function is used to reduce specified slices in a two- or three- dimensional array. - For two-dimensional `values` arrays, for each entry in start_idx, the function sums the - following k columns where k is the corresponding entry in the enc_feat_dim sequence. - The columns whose indices are not in start_idx are left unchanged. This arises when the slices + For two-dimensional `values` arrays, for each entry in `start_idx`, the function sums the + following `k` columns where `k` is the corresponding entry in the `enc_feat_dim` sequence. + The columns whose indices are not in `start_idx` are left unchanged. This arises when the slices contain the shap values for each dimension of an encoded categorical variable and a single shap value for each variable is desired. - For three-dimensional `values` arrays, the reduction is applied for each rank 2 subtensor, first along + For three-dimensional `values` arrays, the reduction is applied for each rank 2 subarray, first along the column dimension and then across the row dimension. This arises when summarising shap interaction values. - Each rank 2 tensor is a E x E matrix of shap interaction values, where E is the dimension of the data after - one-hot encoding. The result of applying the reduction yields a rank 2 tensor of dimension F x F, where F is the - number of features (ie, the feature dimension of the data matrix before encoding). By applying this transformation, - a single value describing the interaction of categorical features i and j and a single value describing the - intearction of j and i is returned. + Each rank 2 array is a `E x E` matrix of shap interaction values, where `E` is the dimension of the data after + one-hot encoding. The result of applying the reduction yields a rank 2 array of dimension `F x F`, where `F` is the + number of features (i.e., the feature dimension of the data matrix before encoding). By applying this + transformation, a single value describing the interaction of categorical features i and j and a single value + describing the interaction of `j` and `i` is returned. Parameters ---------- @@ -130,6 +130,7 @@ def sum_categories(values: np.ndarray, start_idx: Sequence[int], enc_feat_dim: S The start indices of the columns to be summed. enc_feat_dim The number of columns to be summed, one for each start index. + Returns ------- new_values @@ -148,15 +149,15 @@ def sum_categories(values: np.ndarray, start_idx: Sequence[int], enc_feat_dim: S if len(values.shape) not in (2, 3): raise ValueError( - f"Shap value summarisation can only be applied to tensors of shap values (dim=2) or shap " - f"interaction values (dim=3). The tensor to be summarised had dimension {values.shape}!" + f"Shap value summarisation can only be applied to arrays of shap values (dim=2) or shap " + f"interaction values (dim=3). The array to be summarised had dimension {values.shape}!" ) def _get_slices(start: Sequence[int], dim: Sequence[int], arr_trailing_dim: int) -> List[int]: """ Given start indices, encoding dimensions and the array trailing shape, this function returns an array where contiguous numbers are slices. This array is used to reduce along an axis - only the slices `slice(start[i], start[i] + dim[i], 1)` from a tensor and leave all other slices + only the slices `slice(start[i], start[i] + dim[i], 1)` from an array and leave all other slices unchanged. """ @@ -209,9 +210,9 @@ def _reduction(arr, axis, indices=None): """ Default distributed options for KernelShap: - - ``'ncpus'``: ``int``, number of available CPUs available to parallelize explanations. Performance is significantly \ - boosted when the number specified represents physical CPUs, but small (nonlinear) gains are observed when virtual \ - CPUs are specified. If set to `None`, the code will run sequentially. + - ``'ncpus'`` : ``int`` - number of available CPUs available to parallelize explanations. Performance \ + is significantly boosted when the number specified represents physical CPUs, but small (nonlinear) gains are \ + observed when virtual CPUs are specified. If set to ``None``, the code will run sequentially. - ``'batch_size'``: ``int``, how many instances are explained in the same remote process at once. The `shap` library \ of KernelShap is not vectorised, so no significant gains are made by specifying batches. See blog `post`_ for batch \ @@ -227,15 +228,17 @@ class KernelExplainerWrapper(KernelExplainer): """ A wrapper around `shap.KernelExplainer` that supports: - - fixing the seed when instantiating the KernelExplainer in a separate process - - passing a batch index to the explainer so that a parallel explainer pool can return batches in arbitrary order + - fixing the seed when instantiating the KernelExplainer in a separate process. + + - passing a batch index to the explainer so that a parallel explainer pool can return batches in \ + arbitrary order. """ def __init__(self, *args, **kwargs): """ Parameters ----------- - args, kwargs + *args, **kwargs Arguments and keyword arguments for `shap.KernelExplainer` constructor. """ @@ -255,7 +258,7 @@ def get_explanation(self, X: Union[Tuple[int, np.ndarray], np.ndarray], **kwargs X When called from a distributed context, it is a tuple containing a batch index and a batch to be explained. Otherwise, it is an array of instances to be explained. - kwargs + **kwargs `shap.KernelExplainer.shap_values` kwarg values. """ @@ -298,20 +301,20 @@ def __init__(self, Parameters ---------- predictor - A callable that takes as an input a samples x features array and outputs a samples x n_outputs - model outputs. The n_outputs should represent model output in margin space. If the model outputs - probabilities, then the link should be set to 'logit' to ensure correct force plots. + A callable that takes as an input a `samples x features` array and outputs a `samples x n_outputs` + model outputs. The `n_outputs` should represent model output in margin space. If the model outputs + probabilities, then the link should be set to ``'logit'`` to ensure correct force plots. link - Valid values are `'identity'` or `'logit'`. A generalized linear model link to connect the feature + Valid values are ``'identity'`` or ``'logit'``. A generalized linear model link to connect the feature importance values to the model output. Since the feature importance values, :math:`\phi`, sum up to the model output, it often makes sense to connect them to the ouput with a link function where :math:`link(output - expected\_value) = sum(\phi)`. Therefore, for a model which outputs probabilities, - `link='logit'` makes the feature effects have log-odds (evidence) units and `link='identity'` means that the - feature effects have probability units. Please see this `example`_ for an in-depth discussion about the - semantics of explaining the model in the probability or margin space. + ``link='logit'`` makes the feature effects have log-odds (evidence) units and ``link='identity'`` means + that the feature effects have probability units. Please see this `example`_ for an in-depth discussion + about the semantics of explaining the model in the probability or margin space. .. _example: - https://github.com/slundberg/shap/blob/master/notebooks/kernel_explainer/Squashing%20Effect.ipynb + https://github.com/slundberg/shap/blob/master/notebooks/tabular_examples/model_agnostic/Squashing%20Effect.ipynb feature_names Used to infer group names when categorical data is treated by grouping and `group_names` input to `fit` @@ -324,13 +327,13 @@ def __init__(self, summarisation (if specified, subsampling is performed as opposed to k-means clustering). In the future it may be used for visualisation. task - Can have values `'classification'` and `'regression'`. It is only used to set the contents of + Can have values ``'classification'`` and ``'regression'``. It is only used to set the contents of `explanation.data['raw']['prediction']` seed Fixes the random number stream, which influences which subsets are sampled during shap value estimation. distributed_opts - A dictionary that controls the algorithm distributed execution. See `DISTRIBUTED_OPTS` documentation for - details. + A dictionary that controls the algorithm distributed execution. + See :py:data:`alibi.explainers.shap_wrappers.DISTRIBUTED_OPTS` documentation for details. """ # noqa W605 super().__init__(meta=copy.deepcopy(DEFAULT_META_KERNEL_SHAP)) @@ -503,15 +506,15 @@ def _summarise_background(self, n_background_samples: int) -> \ Union[shap_utils.Data, pd.DataFrame, np.ndarray, sparse.spmatrix]: """ - Summarises the background data to n_background_samples in order to reduce the computational cost. If the + Summarises the background data to `n_background_samples` in order to reduce the computational cost. If the background data is a `shap_utils.Data object`, no summarisation is performed. Returns ------- - If the user has specified grouping, then the input object is subsampled and an object of the same - type is returned. Otherwise, a `shap_utils.Data` object containing the result of a k-means algorithm - is wrapped in a `shap_utils.DenseData` object and returned. The samples are weighted according to the - frequency of the occurrence of the clusters in the original data. + If the user has specified grouping, then the input object is subsampled and an object of the same \ + type is returned. Otherwise, a `shap_utils.Data` object containing the result of a k-means algorithm \ + is wrapped in a `shap_utils.DenseData` object and returned. The samples are weighted according to the \ + frequency of the occurrence of the clusters in the original data. """ if isinstance(background_data, shap_utils.Data): @@ -560,8 +563,7 @@ def _(self, background_data, *args, **kwargs) -> shap_utils.Data: Notes _____ - - If `self.summarise_background = True`, then a `shap_utils.Data` object is + If ``self.summarise_background=True``, then a `shap_utils.Data` object is returned if the user passed a `shap_utils.Data` object to `fit` or didn't specify groups. """ @@ -694,13 +696,13 @@ def fit(self, # type: ignore background data should represent samples and the columns features. summarise_background A large background dataset impacts the runtime and memory footprint of the algorithm. By setting - this argument to `True`, only `n_background_samples` from the provided data are selected. If group_names or - groups arguments are specified, the algorithm assumes that the data contains categorical variables so - the records are selected uniformly at random. Otherwise, `shap.kmeans` (a wrapper around `sklearn` k-means - implementation) is used for selection. If set to `'auto'`, a default of + this argument to ``True``, only `n_background_samples` from the provided data are selected. If + group_names or groups arguments are specified, the algorithm assumes that the data contains categorical + variables so the records are selected uniformly at random. Otherwise, `shap.kmeans` (a wrapper + around `sklearn` k-means implementation) is used for selection. If set to ``'auto'``, a default of `KERNEL_SHAP_BACKGROUND_THRESHOLD` samples is selected. n_background_samples - The number of samples to keep in the background dataset if `summarise_background=True`. + The number of samples to keep in the background dataset if ``summarise_background=True``. groups: A list containing sub-lists specifying the indices of features belonging to the same group. group_names: @@ -710,7 +712,7 @@ def fit(self, # type: ignore weights: A sequence or array of weights. This is used only if grouping is specified and assigns a weight to each point in the dataset. - kwargs: + **kwargs: Expected keyword arguments include `keep_index` (bool) and should be used if a data frame containing an index column is passed to the algorithm. """ @@ -802,19 +804,19 @@ def explain(self, cat_vars_enc_dim The length of the encoding dimension for each categorical variable. If specified `cat_vars_start_idx` should also be specified. - kwargs + **kwargs Keyword arguments specifying explain behaviour. Valid arguments are: - - `nsamples`: controls the number of predictor calls and therefore runtime. - - - `l1_reg`: the algorithm is exponential in the feature dimension. If set to `auto` the algorithm will \ + - `nsamples` - controls the number of predictor calls and therefore runtime. + + - `l1_reg` - the algorithm is exponential in the feature dimension. If set to `auto` the algorithm will \ first run a feature selection algorithm to select the top features, provided the fraction of sampled \ sets of missing features is less than 0.2 from the number of total subsets. The Akaike Information \ Criterion is used in this case. See our examples for more details about available settings for this \ parameter. Note that by first running a feature selection step, the shapley values of the remainder of \ the features will be different to those estimated from the entire set. - For more details, please see the shap library `documentation`_ . + For more details, please see the shap library `documentation`_ . .. _documentation: https://shap.readthedocs.io/en/latest/. @@ -823,9 +825,9 @@ def explain(self, ------- explanation An explanation object containing the shap values and prediction in the `data` field, along with a `meta` - field containing additional data. See usage `examples`_ for details. + field containing additional data. See usage at `KernelSHAP examples`_ for details. - .. _examples: + .. _KernelSHAP examples: https://docs.seldon.io/projects/alibi/en/latest/methods/KernelSHAP.html """ # noqa W605 @@ -857,7 +859,7 @@ def explain(self, # else than a list of objects (because it's a generic class). An API update is necessary in order to seamlessly # deal with this. Ignoring with the assumption that this feature is WIP and will not be used for now # (aka, return_generator=True is not passed to the DistributedExplainer) - explanation = self.build_explanation( + explanation = self._build_explanation( X, shap_values, # type: ignore expected_value, @@ -868,11 +870,11 @@ def explain(self, return explanation - def build_explanation(self, - X: Union[np.ndarray, pd.DataFrame, sparse.spmatrix], - shap_values: List[np.ndarray], - expected_value: List[float], - **kwargs) -> Explanation: + def _build_explanation(self, + X: Union[np.ndarray, pd.DataFrame, sparse.spmatrix], + shap_values: List[np.ndarray], + expected_value: List[float], + **kwargs) -> Explanation: """ Create an explanation object. If output summarisation is required and all inputs necessary for this operation are passed, the raw shap values are summed first so that a single shap value is returned for each categorical @@ -883,20 +885,20 @@ def build_explanation(self, X Instances to be explained. shap_values - Each entry is a n_instances x n_features array, and the length of the list equals the dimensionality + Each entry is a `n_instances x n_features array`, and the length of the list equals the dimensionality of the predictor output. The rows of each array correspond to the shap values for the instances with the corresponding row index in `X`. The length of the list equals the number of model outputs. expected_value - A list containing the expected value of the prediction for each class. Its length should be equal to that of - `shap_values`. + A list containing the expected value of the prediction for each class. Its length should be equal to + that of `shap_values`. Returns ------- explanation An explanation object containing the shap values and prediction in the `data` field, along with a `meta` - field containing additional data. See usage `examples`_ for details. + field containing additional data. See usage at `KernelSHAP examples`_ for details. - .. _examples: + .. _KernelSHAP examples: https://docs.seldon.io/projects/alibi/en/latest/methods/KernelSHAP.html """ @@ -956,12 +958,8 @@ def _check_result_summarisation(self, Parameters ---------- - summarise_result: - See `explain` documentation. - cat_vars_start_idx: - See `explain` documentation. - cat_vars_enc_dim: - See `explain` documentation. + summarise_result, cat_vars_start_idx, cat_vars_enc_dim + See :py:meth:`alibi.exapliners.KernelShap.shap_wrapper.explain` documentation. """ self.summarise_result = summarise_result @@ -982,6 +980,14 @@ def _check_result_summarisation(self, self.summarise_result = False def reset_predictor(self, predictor: Callable) -> None: + """ + Resets the prediction function. + + Parameters + ---------- + predictor + New prediction function. + """ self.predictor = predictor # TODO: check if we need to reinitialize self._explainer (potentially not, as it should hold a reference # to self.predictor) however, the shap.KernelExplainer may utilize the Callable to set some attributes @@ -1016,41 +1022,41 @@ def __init__(self, Parameters ---------- predictor - A fitted model to be explained. XGBoost, LightGBM, CatBoost and most tree-based - scikit-learn models are supported. In the future, Pyspark could also be supported. + A fitted model to be explained. `XGBoost`, `LightGBM`, `CatBoost` and most tree-based + `scikit-learn` models are supported. In the future, `Pyspark` could also be supported. Please open an issue if this is a use case for you. model_output - Supported values are: `'raw'`, `'probability'`, `'probability_doubled'`, `'log_loss'`: + Supported values are: ``'raw'``, ``'probability'``, ``'probability_doubled'``, ``'log_loss'``: - - `'raw'`: the raw model of the output, which varies by task, is explained. This option \ + - ``'raw'`` - the raw model of the output, which varies by task, is explained. This option \ should always be used if the `fit` is called without arguments. It should also be set to compute \ shap interaction values. For regression models it is the standard output, for binary classification \ - in XGBoost it is the log odds ratio. \ + in `XGBoost` it is the log odds ratio. \ - - `'probability'`: the probability output is explained. This option should only be used if `fit` was \ + - ``'probability'`` - the probability output is explained. This option should only be used if `fit` \ was called with the `background_data` argument set. The effect of specifying this parameter is that \ - the `shap` library will use this information to transform the shap values computed in margin space (aka \ - using the raw output) to shap values that sum to the probability output by the model plus the model expected \ - output probability. This requires knowledge of the type of output for `predictor` which is inferred by the \ - `shap` library from the model type (e.g., most sklearn models with exception of \ - `sklearn.tree.DecisionTreeClassifier`, `sklearn.ensemble.RandomForestClassifier`, \ + the `shap` library will use this information to transform the shap values computed in margin space \ + (aka using the raw output) to shap values that sum to the probability output by the model plus the \ + model expected output probability. This requires knowledge of the type of output for `predictor` \ + which is inferred by the `shap` library from the model type (e.g., most sklearn models with exception \ + of `sklearn.tree.DecisionTreeClassifier`, `sklearn.ensemble.RandomForestClassifier`, \ `sklearn.ensemble.ExtraTreesClassifier` output logits) or on the basis of the mapping implemented in \ the `shap.TreeEnsemble` constructor. Only trees that output log odds and probabilities are supported \ currently. - - `'probability_doubled'`: used for binary classification problem in situations where the model outputs \ - the logits/probabilities for the positive class but shap values for both outcomes are desired. This \ - option should be used only if `fit` was called with the `background_data` argument set. In \ + - ``'probability_doubled'`` - used for binary classification problem in situations where the model \ + outputs the logits/probabilities for the positive class but shap values for both outcomes are desired. \ + This option should be used only if `fit` was called with the `background_data` argument set. In \ this case the expected value for the negative class is 1 - expected_value for positive class and \ the shap values for the negative class are the negative values of the positive class shap values. \ As before, the explanation happens in the margin space, and the shap values are subsequently adjusted. \ convert the model output to probabilities. The same considerations as for `probability` apply for this \ output type too. - - `'log_loss'`: logarithmic loss is explained. This option shoud be used only if `fit` was called with the \ - `background_data` argument set and requires specifying labels, `y`, when calling `explain`. If the \ - objective is squared error, then the transformation :math:`(output - y)^2` is applied. For binary \ - cross-entropy objective, the transformation :math:`log(1 + exp(output)) - y * output` with \ + - ``'log_loss'`` - logarithmic loss is explained. This option shoud be used only if `fit` was called \ + with the `background_data` argument set and requires specifying labels, `y`, when calling `explain`. \ + If the objective is squared error, then the transformation :math:`(output - y)^2` is applied. For \ + binary cross-entropy objective, the transformation :math:`log(1 + exp(output)) - y * output` with \ :math:`y \in \{0, 1\}`. Currently only binary cross-entropy and squared error losses can be explained. \ feature_names @@ -1061,7 +1067,7 @@ def __init__(self, for the feature. Used to select the method for background data summarisation (if specified, subsampling is performed as opposed to kmeans clustering). In the future it may be used for visualisation. task - Can have values `'classification'` and `'regression'`. It is only used to set the contents of the + Can have values ``'classification'`` and ``'regression'``. It is only used to set the contents of the `prediction` field in the `data['raw']` response field. Notes @@ -1070,7 +1076,7 @@ def __init__(self, (the entire real line). For discussion related to explaining models in output vs probability space, please consult this resource_. - .. _resource: https://github.com/slundberg/shap/blob/master/notebooks/kernel_explainer/Squashing%20Effect.ipynb + .. _resource: https://github.com/slundberg/shap/blob/master/notebooks/tabular_examples/model_agnostic/Squashing%20Effect.ipynb """ # noqa W605 super().__init__(meta=copy.deepcopy(DEFAULT_META_TREE_SHAP)) @@ -1104,9 +1110,9 @@ def fit(self, # type: ignore[override] This function instantiates an explainer which can then be use to explain instances using the `explain` method. If no background dataset is passed, the explainer uses the path-dependent feature perturbation algorithm to explain the values. As such, only the model raw output can be explained and this should be reflected by - passing `model_output='raw'` when instantiating the explainer. If a background dataset is passed, the + passing ``model_output='raw'`` when instantiating the explainer. If a background dataset is passed, the interventional feature perturbation algorithm is used. Using this algorithm, probability outputs can also be - explained. Additionally, if the `model_output='log_loss'` option is passed to the explainer constructor, then + explained. Additionally, if the ``model_output='log_loss'`` option is passed to the explainer constructor, then the model loss function can be explained by passing the labels as the `y` argument to the explain method. A limited number of loss functions are supported, as detailed in the constructor documentation. @@ -1117,12 +1123,12 @@ def fit(self, # type: ignore[override] background data should represent samples and the columns features. summarise_background A large background dataset may impact the runtime and memory footprint of the algorithm. By setting - this argument to `True`, only `n_background_samples` from the provided data are selected. If the + this argument to ``True``, only `n_background_samples` from the provided data are selected. If the `categorical_names` argument has been passed to the constructor, subsampling of the data is used. Otherwise, `shap.kmeans` (a wrapper around `sklearn.kmeans` implementation) is used for selection. - If set to `'auto'`, a default of `TREE_SHAP_BACKGROUND_WARNING_THRESHOLD` samples is selected. + If set to ``'auto'``, a default of `TREE_SHAP_BACKGROUND_WARNING_THRESHOLD` samples is selected. n_background_samples - The number of samples to keep in the background dataset if `summarise_background=True`. + The number of samples to keep in the background dataset if ``summarise_background=True``. """ np.random.seed(self.seed) @@ -1206,9 +1212,9 @@ def _summarise_background(self, Returns ------- - If the `categorical_names` argument to the constructor is specified, then an object of the same type as - input containing only `n_background_samples` is returned. Otherwise, a `shap_utils.Data` containing an - `np.ndarray` object of `n_background_samples` in the `data` field is returned. + If the `categorical_names` argument to the constructor is specified, then an object of the same type as + input containing only `n_background_samples` is returned. Otherwise, a `shap_utils.Data` containing an + `np.ndarray` object of `n_background_samples` in the `data` field is returned. """ @@ -1244,11 +1250,11 @@ def explain(self, Labels corresponding to rows of `X`. Should be passed only if a background dataset was passed to the `fit` method. interactions - If `True`, the shap value for every feature of every instance in `X` is decomposed into + If ``True``, the shap value for every feature of every instance in `X` is decomposed into `X.shape[1] - 1` shap value interactions and one main effect. This is only supported if `fit` is called with `background_dataset=None`. approximate - If `True`, an approximation to the shap values that does not account for feature order is computed. This + If ``True``, an approximation to the shap values that does not account for feature order is computed. This was proposed by `Ando Sabaas`_ here . Check `this`_ resource for more details. This option is currently only supported for `xgboost` and `sklearn` models. @@ -1259,11 +1265,11 @@ def explain(self, https://static-content.springer.com/esm/art%3A10.1038%2Fs42256-019-0138-9/MediaObjects/42256_2019_138_MOESM1_ESM.pdf check_additivity - If `True`, output correctness is ensured if `model_output='raw'` has been passed to the constructor. + If ``True``, output correctness is ensured if ``model_output='raw'`` has been passed to the constructor. tree_limit Explain the output of a subset of the first `tree_limit` trees in an ensemble model. summarise_result - This should be set to True only when some of the columns in `X` represent encoded dimensions of a + This should be set to ``True`` only when some of the columns in `X` represent encoded dimensions of a categorical variable and one single shap value per categorical variable is desired. Both `cat_vars_start_idx` and `cat_vars_enc_dim` should be specified as detailed below to allow this. cat_vars_start_idx @@ -1275,9 +1281,9 @@ def explain(self, ------- explanation An `Explanation` object containing the shap values and prediction in the `data` field, along with a - `meta` field containing additional data. See usage examples `here`_ for details. + `meta` field containing additional data. See usage at `TreeSHAP examples`_ for details. - .. _here: + .. _TreeSHAP examples: https://docs.seldon.io/projects/alibi/en/latest/methods/TreeSHAP.html """ # noqa: E501 @@ -1323,7 +1329,7 @@ def explain(self, params=True ) - explanation = self.build_explanation( + explanation = self._build_explanation( X, shap_output, expected_value, @@ -1338,8 +1344,8 @@ def explain(self, def _xgboost_interactions(self, X: Union[np.ndarray, pd.DataFrame]) -> Union[np.ndarray, List[np.ndarray]]: """ `shap` library handling of `xgboost` causes a `ValueError` due to `xgboost` (features name mismatch) - if you call `shap_interaction_values` with a numpy array (likely only if the user declares their - `xgboost.DMatrix` object with the feature_names keyword argument). This method converts the + if you call `shap_interaction_values` with a `numpy` array (likely only if the user declares their + `xgboost.DMatrix` object with the `feature_names` keyword argument). This method converts the incoming numpy array to an `xgboost.DMatrix` object with feature names that match the predictor. """ @@ -1374,7 +1380,7 @@ def _check_interactions(self, approximate: bool, Warns ----- - If approximate values are requested. These values are not defined for interactions. + If approximate values are requested. These values are not defined for interactions. """ self.approximate = approximate @@ -1412,7 +1418,6 @@ def _check_explainer_setup(self, y See `explain` method documentation. - Raises ------ NotImplementedError @@ -1446,11 +1451,11 @@ def _check_explainer_setup(self, f"explainer with the desired model output option and then call fit(background_data=my_data)!" ) - def build_explanation(self, - X: Union[np.ndarray, pd.DataFrame, 'catboost.Pool'], - shap_output: List[np.ndarray], - expected_value: List[float], - **kwargs) -> Explanation: + def _build_explanation(self, + X: Union[np.ndarray, pd.DataFrame, 'catboost.Pool'], + shap_output: List[np.ndarray], + expected_value: List[float], + **kwargs) -> Explanation: """ Create an explanation object. If output summarisation is required and all inputs necessary for this operation @@ -1465,8 +1470,8 @@ def build_explanation(self, X Instances to be explained. shap_output - If `explain` is callled with `interactions=True` then the list contains tensors of dimensionality - `n_instances x n_features x n_features` of shap interaction values. Otherwise, it contains tensors of + If `explain` is callled with ``interactions=True`` then the list contains arrays of dimensionality + `n_instances x n_features x n_features` of shap interaction values. Otherwise, it contains arrays of dimension `n_instances x n_features` representing shap values. The length of the list equals the number of model outputs. expected_value @@ -1477,9 +1482,9 @@ def build_explanation(self, ------- explanation An `Explanation` object containing the shap values and prediction in the `data` field, along with a - `meta` field containing additional data. See usage examples `here`_ for details. + `meta` field containing additional data. See usage at `TreeSHAP examples`_ for details. - .. _here: + .. _TreeSHAP examples: https://docs.seldon.io/projects/alibi/en/latest/methods/TreeSHAP.html """ @@ -1577,12 +1582,8 @@ def _check_result_summarisation(self, Parameters ---------- - summarise_result: - See `explain` documentation. - cat_vars_start_idx: - See `explain` documentation. - cat_vars_enc_dim: - See `explain` documentation. + summarise_result, cat_vars_start_idx, cat_vars_enc_dim + See :py:meth:`alibi.exapliners.shap_wrappers.TreeShap.explain` documentation. """ self.summarise_result = summarise_result @@ -1595,5 +1596,13 @@ def _check_result_summarisation(self, self.summarise_result = False def reset_predictor(self, predictor: Any) -> None: + """ + Resets the predictor. + + Parameters + ---------- + predictor + New prediction. + """ # TODO: check what else should be done (e.g. validate dtypes again?) self.predictor = predictor diff --git a/alibi/explainers/tests/test_shap_wrappers.py b/alibi/explainers/tests/test_shap_wrappers.py index e847996fc..74b6b425b 100644 --- a/alibi/explainers/tests/test_shap_wrappers.py +++ b/alibi/explainers/tests/test_shap_wrappers.py @@ -1058,7 +1058,7 @@ def test_build_explanation_kernel(mock_kernel_shap_explainer, task): X = get_random_matrix(n_rows=n_instances, n_cols=n_feats) shap_values = [get_random_matrix(n_rows=n_instances, n_cols=n_feats) for _ in range(n_outs)] expected_value = [np.random.random() for _ in range(n_outs)] - response = explainer.build_explanation(X, shap_values, expected_value) + response = explainer._build_explanation(X, shap_values, expected_value) if task == 'regression': assert not response.data['raw']['prediction'] @@ -1307,7 +1307,7 @@ def test_explain_tree(caplog, monkeypatch, mock_tree_shap_explainer, data_type, # TODO: @janis: let's do path.multiple or something like that with unittest.mock.patch.object(explainer, '_check_interactions'): with unittest.mock.patch.object(explainer, '_check_explainer_setup'): - with unittest.mock.patch.object(explainer, 'build_explanation'): + with unittest.mock.patch.object(explainer, '_build_explanation'): # explain some instances explainer.explain( @@ -1325,8 +1325,8 @@ def test_explain_tree(caplog, monkeypatch, mock_tree_shap_explainer, data_type, explainer._check_interactions.asert_not_called() explainer._check_explainer_setup.assert_called_with(background_data, explainer.model_output, None) - explainer.build_explanation.assert_called_once() - build_args = explainer.build_explanation.call_args + explainer._build_explanation.assert_called_once() + build_args = explainer._build_explanation.call_args # check shap values and expected value are of the correct data type for all dimensions assert isinstance(build_args[0][1], list) assert isinstance(build_args[0][2], list) @@ -1447,7 +1447,7 @@ def uncollect_if_test_tree_api(**kwargs): data_type = kwargs['data_type'] interactions = kwargs['interactions'] - # exclude this as the code would raise a value error before calling build_explanation + # exclude this as the code would raise a value error before calling _build_explanation conditions = [ labels and model_output != 'log_loss', labels and data_type == 'none', diff --git a/alibi/models/pytorch/actor_critic.py b/alibi/models/pytorch/actor_critic.py index bfafc52ba..38639c5c2 100644 --- a/alibi/models/pytorch/actor_critic.py +++ b/alibi/models/pytorch/actor_critic.py @@ -35,6 +35,18 @@ def __init__(self, hidden_dim: int, output_dim: int) -> None: self.fc3 = nn.LazyLinear(output_dim) def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + Continuous action. + """ x = F.relu(self.ln1(self.fc1(x))) x = F.relu(self.ln2(self.fc2(x))) x = torch.tanh(self.fc3(x)) @@ -65,6 +77,18 @@ def __init__(self, hidden_dim: int): self.fc3 = nn.LazyLinear(1) def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + Critic value. + """ x = F.relu(self.ln1(self.fc1(x))) x = F.relu(self.ln2(self.fc2(x))) x = self.fc3(x) diff --git a/alibi/models/pytorch/autoencoder.py b/alibi/models/pytorch/autoencoder.py index 4bcfc8d3f..ea7ae07f3 100644 --- a/alibi/models/pytorch/autoencoder.py +++ b/alibi/models/pytorch/autoencoder.py @@ -8,7 +8,7 @@ import torch.nn as nn from alibi.models.pytorch.model import Model -from typing import List +from typing import List, Union class AE(Model): @@ -28,9 +28,9 @@ def __init__(self, Parameters ---------- encoder - Encoder network + Encoder network. decoder - Decoder network + Decoder network. """ super().__init__(**kwargs) @@ -41,7 +41,20 @@ def __init__(self, # send to device self.to(self.device) - def forward(self, x: torch.Tensor): + def forward(self, x: torch.Tensor) -> Union[torch.Tensor, List[torch.Tensor]]: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + x_hat + Reconstruction of the input tensor. + """ z = self.encoder(x) x_hat = self.decoder(z) return x_hat @@ -74,6 +87,20 @@ def __init__(self, self.to(self.device) def forward(self, x: torch.Tensor) -> List[torch.Tensor]: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + List of reconstruction of the input tensor. First element corresponds to the reconstruction of all the \ + numerical features if they exist, and the rest of the elements correspond to each categorical feature. + + """ x_hat = super().forward(x) # TODO: think of a better way to do the check, or maybe just remove it since return type hints diff --git a/alibi/models/pytorch/cfrl_models.py b/alibi/models/pytorch/cfrl_models.py index 930b30c5c..d32ce0aa3 100644 --- a/alibi/models/pytorch/cfrl_models.py +++ b/alibi/models/pytorch/cfrl_models.py @@ -44,6 +44,18 @@ def __init__(self, output_dim: int) -> None: self.to(self.device) def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + Classification logits. + """ x = self.dropout1(self.maxpool1(F.relu(self.conv1(x)))) x = self.dropout2(self.maxpool2(F.relu(self.conv2(x)))) x = self.flatten(x) @@ -83,6 +95,18 @@ def __init__(self, latent_dim: int): self.fc1 = nn.Linear(8 * 4 * 4, latent_dim) def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + Encoding representation having each component in the interval [-1, 1] + """ x = self.maxpool1(F.relu(self.conv1(x))) x = self.maxpool2(F.relu(self.conv2(x))) x = self.maxpool3(F.relu(self.conv3(x))) @@ -121,6 +145,18 @@ def __init__(self, latent_dim: int): self.conv4 = nn.Conv2d(16, 1, kernel_size=(3, 3), padding=1) def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + Decoded input having each component in the interval [0, 1]. + """ x = F.relu(self.fc1(x)) x = x.view(x.shape[0], 8, 4, 4) x = self.up1(F.relu(self.conv1(x))) @@ -154,6 +190,18 @@ def __init__(self, hidden_dim: int, latent_dim: int): self.fc2 = nn.LazyLinear(latent_dim) def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + Encoding representation having each component in the interval [-1, 1] + """ x = F.relu(self.fc1(x)) x = torch.tanh(self.fc2(x)) return x @@ -182,6 +230,19 @@ def __init__(self, hidden_dim: int, output_dims: List[int]): self.fcs = nn.ModuleList([nn.LazyLinear(dim) for dim in output_dims]) def forward(self, x: torch.Tensor) -> List[torch.Tensor]: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + List of reconstruction of the input tensor. First element corresponds to the reconstruction of all the \ + numerical features if they exist, and the rest of the elements correspond to each categorical feature. + """ x = F.relu(self.fc1(x)) xs = [fc(x) for fc in self.fcs] return xs diff --git a/alibi/models/pytorch/metrics.py b/alibi/models/pytorch/metrics.py index f9b44916a..92f7e561c 100644 --- a/alibi/models/pytorch/metrics.py +++ b/alibi/models/pytorch/metrics.py @@ -15,8 +15,8 @@ class Reduction(Enum): """ Reduction operation supported by the monitoring metrics. """ - SUM = 'sum' - MEAN = 'mean' + SUM: str = 'sum' + MEAN: str = 'mean' class LossContainer: @@ -47,6 +47,10 @@ def __call__(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor: Prediction labels. y_true True labels. + + Returns + ------- + Loss value. """ # compute loss diff --git a/alibi/models/pytorch/model.py b/alibi/models/pytorch/model.py index e5702fbdb..2538ff677 100644 --- a/alibi/models/pytorch/model.py +++ b/alibi/models/pytorch/model.py @@ -28,7 +28,7 @@ def compile(self, metrics: Optional[List[Metric]] = None): """ Compiles a model by setting the optimizer and the loss functions, loss weights and metrics to monitor - the training of the model.. + the training of the model. Parameters ---------- @@ -118,8 +118,8 @@ def compute_loss(self, Returns ------- - A tuple consisting of the total loss computed as a weighted sum of individual losses and a dictionary \ - of individual losses used of logging. + A tuple consisting of the total loss computed as a weighted sum of individual losses and a dictionary \ + of individual losses used of logging. """ # compute loss if isinstance(self.loss, list): @@ -266,7 +266,7 @@ def fit(self, trainloader: DataLoader, epochs: int) -> Dict[str, float]: Returns ------- - Final epoch monitoring metrics. + Final epoch monitoring metrics. """ for epoch in range(epochs): print("Epoch %d/%d" % (epoch, epochs)) @@ -299,7 +299,7 @@ def evaluate(self, testloader: DataLoader) -> Dict[str, float]: Returns ------- - Evaluation metrics. + Evaluation metrics. """ self._reset_loss() self._reset_metrics() @@ -329,7 +329,7 @@ def _metrics_to_str(metrics: Dict[str, float]) -> str: Returns ------- - String representation of the metrics. + String representation of the metrics. """ str_losses = '' for key in metrics: diff --git a/alibi/models/tensorflow/actor_critic.py b/alibi/models/tensorflow/actor_critic.py index c8024e80f..8bed0f72a 100644 --- a/alibi/models/tensorflow/actor_critic.py +++ b/alibi/models/tensorflow/actor_critic.py @@ -34,6 +34,20 @@ def __init__(self, hidden_dim: int, output_dim: int, **kwargs): self.fc3 = keras.layers.Dense(output_dim) def call(self, x: tf.Tensor, **kwargs) -> tf.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + **kwargs + Other arguments. Not used. + + Returns + ------- + Continuous action. + """ x = tf.nn.relu(self.ln1(self.fc1(x))) x = tf.nn.relu(self.ln2(self.fc2(x))) x = tf.nn.tanh(self.fc3(x)) @@ -51,6 +65,8 @@ def __init__(self, hidden_dim: int, **kwargs): """ Constructor. + Parameters + ---------- hidden_dim Hidden dimension. """ @@ -62,6 +78,18 @@ def __init__(self, hidden_dim: int, **kwargs): self.fc3 = keras.layers.Dense(1) def call(self, x: tf.Tensor, **kwargs) -> tf.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + + Returns + ------- + Critic value. + """ x = tf.nn.relu(self.ln1(self.fc1(x))) x = tf.nn.relu(self.ln2(self.fc2(x))) x = self.fc3(x) diff --git a/alibi/models/tensorflow/autoencoder.py b/alibi/models/tensorflow/autoencoder.py index 01e0cc756..91d850c7d 100644 --- a/alibi/models/tensorflow/autoencoder.py +++ b/alibi/models/tensorflow/autoencoder.py @@ -6,7 +6,7 @@ import tensorflow as tf import tensorflow.keras as keras -from typing import List, Tuple +from typing import List, Tuple, Union class AE(keras.Model): @@ -34,7 +34,23 @@ def __init__(self, self.encoder = encoder self.decoder = decoder - def call(self, x: tf.Tensor, **kwargs): + def call(self, x: tf.Tensor, **kwargs) -> Union[tf.Tensor, List[tf.Tensor]]: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + **kwargs + Other arguments passed to encoder/decoder `call` method. + + Returns + ------- + x_hat + Reconstruction of the input tensor. + """ + z = self.encoder(x, **kwargs) x_hat = self.decoder(z, **kwargs) return x_hat @@ -63,7 +79,15 @@ def __init__(self, """ super().__init__(encoder=encoder, decoder=decoder, **kwargs) - def build(self, input_shape: Tuple[int, ...]): + def build(self, input_shape: Tuple[int, ...]) -> None: + """ + Build method. + + Parameters + ---------- + input_shape + Tensor's input shape. + """ super().build(input_shape) # Check if the output is a list @@ -74,4 +98,19 @@ def build(self, input_shape: Tuple[int, ...]): raise ValueError("The output of HeAE should be a list.") def call(self, x: tf.Tensor, **kwargs) -> List[tf.Tensor]: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + **kwargs + Other arguments passed to the encoder/decoder. + + Returns + -------- + List of reconstruction of the input tensor. First element corresponds to the reconstruction of all the \ + numerical features if they exist, and the rest of the elements correspond to each categorical feature. + """ return super().call(x, **kwargs) diff --git a/alibi/models/tensorflow/cfrl_models.py b/alibi/models/tensorflow/cfrl_models.py index 256ec46a5..79e7a5d6a 100644 --- a/alibi/models/tensorflow/cfrl_models.py +++ b/alibi/models/tensorflow/cfrl_models.py @@ -38,6 +38,22 @@ def __init__(self, output_dim: int = 10, **kwargs) -> None: self.fc2 = keras.layers.Dense(output_dim) def call(self, x: tf.Tensor, training: bool = True, **kwargs) -> tf.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + training + Training flag. + **kwargs + Other arguments. Not used. + + Returns + ------- + Classification logits. + """ x = self.dropout1(self.maxpool1(self.conv1(x)), training=training) x = self.dropout2(self.maxpool2(self.conv2(x)), training=training) x = self.fc2(self.fc1(self.flatten(x))) @@ -75,6 +91,20 @@ def __init__(self, latent_dim: int, **kwargs) -> None: self.fc1 = keras.layers.Dense(latent_dim, activation='tanh') def call(self, x: tf.Tensor, **kwargs) -> tf.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + **kwargs + Other arguments. Not used. + + Returns + ------- + Encoding representation having each component in the interval [-1, 1] + """ x = self.maxpool1(self.conv1(x)) x = self.maxpool2(self.conv2(x)) x = self.maxpool3(self.conv3(x)) @@ -106,6 +136,20 @@ def __init__(self, **kwargs) -> None: self.conv4 = keras.layers.Conv2D(1, (3, 3), padding="same", activation="sigmoid") def call(self, x: tf.Tensor, **kwargs) -> tf.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor + **kwargs + Other arguments. Not used. + + Returns + ------- + Decoded input having each component in the interval [0, 1]. + """ x = self.reshape(self.fc1(x)) x = self.up1(self.conv1(x)) x = self.up2(self.conv2(x)) @@ -138,6 +182,20 @@ def __init__(self, hidden_dim: int, latent_dim: int, **kwargs): self.fc2 = keras.layers.Dense(latent_dim) def call(self, x: tf.Tensor, **kwargs) -> tf.Tensor: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + **kwargs + Other arguments. + + Returns + ------- + Encoding representation having each component in the interval [-1, 1]. + """ x = tf.nn.relu(self.fc1(x)) x = tf.nn.tanh(self.fc2(x)) return x @@ -167,6 +225,21 @@ def __init__(self, hidden_dim: int, output_dims: List[int], **kwargs): self.fcs = [keras.layers.Dense(dim) for dim in output_dims] def call(self, x: tf.Tensor, **kwargs) -> List[tf.Tensor]: + """ + Forward pass. + + Parameters + ---------- + x + Input tensor. + **kwargs + Other arguments. Not used. + + Returns + ------- + List of reconstruction of the input tensor. First element corresponds to the reconstruction of all the \ + numerical features if they exist, and the rest of the elements correspond to each categorical feature. + """ x = tf.nn.relu(self.fc1(x)) xs = [fc(x) for fc in self.fcs] return xs diff --git a/alibi/utils/approximation_methods.py b/alibi/utils/approximation_methods.py index 492a0cce5..17ac0a52d 100644 --- a/alibi/utils/approximation_methods.py +++ b/alibi/utils/approximation_methods.py @@ -19,6 +19,9 @@ class Riemann(Enum): "riemann_middle", "riemann_trapezoid", ] +""" +Riemann integration methods. +""" SUPPORTED_METHODS = SUPPORTED_RIEMANN_METHODS + ["gausslegendre"] @@ -26,12 +29,15 @@ class Riemann(Enum): def approximation_parameters( method: str, ) -> Tuple[Callable[[int], List[float]], Callable[[int], List[float]]]: - """Retrieves parameters for the input approximation `method` + """ + Retrieves parameters for the input approximation `method`. Parameters ---------- method - The name of the approximation method. Currently only `riemann` and gauss legendre are + The name of the approximation method. Currently supported only: ``'riemann_*'`` and ``'gausslegendre``'. + Check :py:data:`alibi.utils.approximation_methods.SUPPORTED_RIEMANN_METHODS` for all ``'riemann_*'`` possible + values. """ if method in SUPPORTED_RIEMANN_METHODS: return riemann_builders(method=Riemann[method.split("_")[-1]]) @@ -43,27 +49,25 @@ def approximation_parameters( def riemann_builders( method: Riemann = Riemann.trapezoid, ) -> Tuple[Callable[[int], List[float]], Callable[[int], List[float]]]: - """Step sizes are identical and alphas are scaled in [0, 1] + """ + Step sizes are identical and alphas are scaled in [0, 1]. Parameters ---------- n - The number of integration steps + The number of integration steps. method - `left`, `right`, `middle` and `trapezoid` riemann + Riemann method: ``Riemann.left`` | ``Riemann.right`` | ``Riemann.middle`` | ``Riemann.trapezoid``. Returns ------- - 2-element tuple of **step_sizes**, **alphas**: - - **step_sizes** (*callable*): - `step_sizes` takes the number of steps as an - input argument and returns an array of steps sizes which - sum is smaller than or equal to one. + 2-element tuple consisting of + + - `step_sizes` : ``Callable`` - `step_sizes` takes the number of steps as an input argument and returns an \ + array of steps sizes which sum is smaller than or equal to one. - - **alphas** (*callable*): - `alphas` takes the number of steps as an input argument - and returns the multipliers/coefficients for the inputs - of integrand in the range of [0, 1] + - `alphas` : ``Callable`` - `alphas` takes the number of steps as an input argument and returns the \ + multipliers/coefficients for the inputs of integrand in the range of [0, 1]. """ @@ -100,10 +104,10 @@ def alphas(n: int) -> List[float]: def gauss_legendre_builders() -> Tuple[ Callable[[int], List[float]], Callable[[int], List[float]] ]: - """ Numpy's `np.polynomial.legendre` function helps to compute step sizes - and alpha coefficients using gauss-legendre quadrature rule. - Since numpy returns the integration parameters in different scales we need to - rescale them to adjust to the desired scale. + """ + `np.polynomial.legendre` function helps to compute step sizes and alpha coefficients using gauss-legendre + quadrature rule. Since `numpy` returns the integration parameters in different scales we need to rescale them to + adjust to the desired scale. Gauss Legendre quadrature rule for approximating the integrals was originally proposed by [Xue Feng and her intern Hauroun Habeeb] @@ -112,20 +116,17 @@ def gauss_legendre_builders() -> Tuple[ Parameters ---------- n - The number of integration steps + The number of integration steps. Returns ------- - 2-element tuple of **step_sizes**, **alphas**: - - **step_sizes** (*callable*): - `step_sizes` takes the number of steps as an - input argument and returns an array of steps sizes which - sum is smaller than or equal to one. - - - **alphas** (*callable*): - `alphas` takes the number of steps as an input argument - and returns the multipliers/coefficients for the inputs - of integrand in the range of [0, 1] + 2-element tuple consisting of + + - `step_sizes` : ``Callable`` - `step_sizes` takes the number of steps as an input argument and returns an \ + array of steps sizes which sum is smaller than or equal to one. + + - `alphas` : ``Callable`` - `alphas` takes the number of steps as an input argument and returns the \ + multipliers/coefficients for the inputs of integrand in the range of [0, 1]. """ diff --git a/alibi/utils/data.py b/alibi/utils/data.py index 383d0187a..b6a3d2aa4 100644 --- a/alibi/utils/data.py +++ b/alibi/utils/data.py @@ -9,7 +9,7 @@ class Bunch(dict): """ - Container object for internal datasets + Container object for internal datasets. Dictionary-like object that exposes its keys as attributes. """ @@ -36,10 +36,10 @@ def gen_category_map(data: Union[pd.DataFrame, np.ndarray], Parameters ---------- data - 2-dimensional pandas dataframe or numpy array. + 2-dimensional `pandas` dataframe or `numpy` array. categorical_columns - A list of columns indicating categorical variables. Optional if passing a pandas dataframe as inference will - be used based on dtype 'O'. If passing a numpy array this is compulsory. + A list of columns indicating categorical variables. Optional if passing a `pandas` dataframe as inference + will be used based on dtype ``'O'``. If passing a `numpy` array this is compulsory. Returns ------- diff --git a/alibi/utils/discretizer.py b/alibi/utils/discretizer.py index 9e1582aee..d1ce8b0e2 100644 --- a/alibi/utils/discretizer.py +++ b/alibi/utils/discretizer.py @@ -10,18 +10,18 @@ class Discretizer(object): def __init__(self, data: np.ndarray, numerical_features: List[int], feature_names: List[str], percentiles: Sequence[Union[int, float]] = (25, 50, 75)) -> None: """ - Initialize the discretizer + Initialize the discretizer. Parameters ---------- data - Data to discretize + Data to discretize. numerical_features List of indices corresponding to the continuous feature columns. Only these features will be discretized. feature_names - List with feature names + List with feature names. percentiles - Percentiles used for discretization + Percentiles used for discretization. """ self.to_discretize = numerical_features @@ -49,19 +49,20 @@ def __init__(self, data: np.ndarray, numerical_features: List[int], feature_name def get_percentiles(x: np.ndarray, qts: np.ndarray) -> np.ndarray: """ Discretizes the the data in `x` using the quantiles in `qts`. - This is achieved by searching for the index of each value in x + This is achieved by searching for the index of each value in `x` into `qts`, which is assumed to be a 1-D sorted array. Parameters ---------- x - A tensor of data to be discretized + A `numpy` array of data to be discretized qts: - A percentiles array. This should be a 1-D array sorted in + A `numpy` array of percentiles. This should be a 1-D array sorted in ascending order. + Returns ------- - A discretized data tensor. + A discretized data `numpy` array. """ if len(qts.shape) != 1: @@ -94,7 +95,7 @@ def discretize(self, data: np.ndarray) -> np.ndarray: Parameters ---------- data - Data to discretize + Data to discretize. Returns ------- diff --git a/alibi/utils/distance.py b/alibi/utils/distance.py index 0a970f2f5..3ba457d33 100644 --- a/alibi/utils/distance.py +++ b/alibi/utils/distance.py @@ -7,18 +7,18 @@ def cityblock_batch(X: np.ndarray, y: np.ndarray) -> np.ndarray: """ - Calculate the L1 distances between a batch of arrays X and an array of the same shape y. + Calculate the L1 distances between a batch of arrays `X` and an array of the same shape `y`. Parameters ---------- X - Batch of arrays to calculate the distances from + Batch of arrays to calculate the distances from. y - Array to calculate the distance to + Array to calculate the distance to. Returns ------- - Array of distances from each array in X to y + Array of distances from each array in `X` to `y`. """ X_dim = len(X.shape) @@ -178,20 +178,19 @@ def multidim_scaling(d_pair: dict, Dict with as keys the column index of the categorical variables and as values a pairwise distance matrix for the categories of the variable. feature_range - Tuple with min and max ranges to allow for perturbed instances. Min and max ranges can be floats or - numpy arrays with dimension (1 x nb of features) for feature-wise ranges. + Tuple with `min` and `max` ranges to allow for perturbed instances. `Min` and `max` ranges can be `float` or + `numpy` arrays with dimension (`1 x nb of features`) for feature-wise ranges. n_components Number of dimensions in which to immerse the dissimilarities. use_metric - If True, perform metric MDS; otherwise, perform nonmetric MDS. + If ``True``, perform metric MDS; otherwise, perform nonmetric MDS. standardize_cat_vars - Standardize numerical values of categorical variables if True. - + Standardize numerical values of categorical variables if ``True``. smooth - Smoothing exponent between 0 and 1 for the distances. Lower values of l will smooth the difference in + Smoothing exponent between 0 and 1 for the distances. Lower values than 1 will smooth the difference in distance metric between different features. center - Whether to center the scaled distance measures. If False, the min distance for each feature + Whether to center the scaled distance measures. If ``False``, the min distance for each feature except for the feature with the highest raw max distance will be the lower bound of the feature range, but the upper bound will be below the max feature range. update_feature_range diff --git a/alibi/utils/distributed.py b/alibi/utils/distributed.py index d7602c211..9c5f826c9 100644 --- a/alibi/utils/distributed.py +++ b/alibi/utils/distributed.py @@ -11,11 +11,11 @@ def check_ray() -> bool: """ - Checks if ray is installed. + Checks if `ray` is installed. Returns ------- - A bool indicating whether ray is installed or not. + A `bool` indicating whether `ray` is installed or not. """ import importlib @@ -38,17 +38,22 @@ class ActorPool(object): def __init__(self, actors): """ - Taken fom the ray repository: https://github.com/ray-project/ray/pull/5945 - Create an Actor pool from a list of existing actors. - An actor pool is a utility class similar to multiprocessing.Pool that - lets you schedule Ray tasks over a fixed pool of actors. - Arguments: - actors (list): List of Ray actor handles to use in this pool. - Examples: - >>> a1, a2 = Actor.remote(), Actor.remote() - >>> pool = ActorPool([a1, a2]) - >>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])) - [2, 4, 6, 8] + Taken fom the `ray` repository: https://github.com/ray-project/ray/pull/5945 . + Create an actor pool from a list of existing actors. + An actor pool is a utility class similar to `multiprocessing.Pool` that + lets you schedule `ray` tasks over a fixed pool of actors. + + Parameters + ---------- + actors + List of `ray` actor handles to use in this pool. + + Examples + --------- + >>> a1, a2 = Actor.remote(), Actor.remote() + >>> pool = ActorPool([a1, a2]) + >>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])) + [2, 4, 6, 8] """ self._idle_actors = list(actors) self._future_to_actor = {} @@ -58,29 +63,29 @@ def __init__(self, actors): self._pending_submits = [] def map(self, fn, values, chunksize=1): - """Apply the given function in parallel over the actors and values. This returns an ordered iterator that will - return results of the map as they finish. Note that you must iterate over the iterator to force the computation - to finish. + """Apply the given function in parallel over the `actors` and `values`. This returns an ordered iterator + that will return results of the map as they finish. Note that you must iterate over the iterator to force + the computation to finish. Parameters ---------- - fn (func) - Function that takes (actor, value) as argument and returns an ObjectID computing the result over the value. - The actor will be considered busy until the ObjectID completes. - values (list) - List of values that fn(actor, value) should be applied to. - chunksize (int) - splits the list of values to be submitted to the parallel process into sublists of size chunksize or less + fn : Callable + Function that takes `(actor, value)` as argument and returns an `ObjectID` computing the result over + the `value`. The `actor` will be considered busy until the `ObjectID` completes. + values : list + List of values that `fn(actor, value)` should be applied to. + chunksize : int + Splits the list of values to be submitted to the parallel process into sublists of size chunksize or less. Returns ------- - Iterator over results from applying fn to the actors and values. + Iterator over results from applying `fn` to the `actors` and `values`. Examples -------- - >>> pool = ActorPool(...) - >>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])) - [2, 4, 6, 8] + >>> pool = ActorPool(...) + >>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])) + [2, 4, 6, 8] """ values = self._chunk(values, chunksize=chunksize) @@ -91,29 +96,31 @@ def map(self, fn, values, chunksize=1): yield self.get_next() def map_unordered(self, fn, values, chunksize=1): - """Similar to map(), but returning an unordered iterator. This returns an unordered iterator that will - return results of the map as they finish. This can be more efficient that map() if some results take longer to - compute than others. + """ + Similar to :py:meth:`alibi.utils.distributed.ActorPool.map`, but returning an unordered iterator. + This returns an unordered iterator that will return results of the map as they finish. This can be more + efficient that :py:meth:`alibi.utils.distributed.ActorPool.map` if some results take longer to compute + than others. Parameters ---------- - fn (func) - Function that takes (actor, value) as argument and returns an ObjectID computing the result over the value. - The actor will be considered busy until the ObjectID completes. - values (list) - List of values that fn(actor, value) should be applied to. - chunksize (int) - splits the list of values to be submitted to the parallel process into sublists of size chunksize or less + fn : Callable + Function that takes `(actor, value)` as argument and returns an `ObjectID` computing the result over + the `value`. The `actor` will be considered busy until the `ObjectID` completes. + values : list + List of values that `fn(actor, value)` should be applied to. + chunksize : int + Splits the list of values to be submitted to the parallel process into sublists of size chunksize or less. Returns ------- - Iterator over results from applying fn to the actors and values. + Iterator over results from applying `fn` to the `actors` and `values`. Examples -------- - >>> pool = ActorPool(...) - >>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])) - [6, 2, 4, 8] + >>> pool = ActorPool(...) + >>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])) + [6, 2, 4, 8] """ values = self._chunk(values, chunksize=chunksize) @@ -123,24 +130,28 @@ def map_unordered(self, fn, values, chunksize=1): while self.has_next(): yield self.get_next_unordered() - def submit(self, fn, value): - """Schedule a single task to run in the pool. This has the same argument semantics as map(), but takes on a - single value instead of a list of values. The result can be retrieved using get_next() / get_next_unordered(). + def submit(self, fn: Callable, value: object): + """ + Schedule a single task to run in the pool. This has the same argument semantics as + :py:meth:`alibi.utils.distributed.ActorPool.map`, but takes on a single value instead of a list of values. + The result can be retrieved using :py:meth:`alibi.utils.distributed.ActorPool.get_next()` / + :py:meth:`alibi.utils.distributed.ActorPool.get_next_unordered()`. Parameters ---------- - fn (func) - Function that takes (actor, value) as argument and returns an ObjectID computing the result over the value. - The actor will be considered busy until the ObjectID completes. - value (object): Value to compute a result for. + fn + Function that takes `(actor, value)` as argument and returns an `ObjectID` computing the result over + the `value`. The `actor` will be considered busy until the `ObjectID` completes. + value + Value to compute a result for. Examples -------- - >>> pool = ActorPool(...) - >>> pool.submit(lambda a, v: a.double.remote(v), 1) - >>> pool.submit(lambda a, v: a.double.remote(v), 2) - >>> print(pool.get_next(), pool.get_next()) - 2, 4 + >>> pool = ActorPool(...) + >>> pool.submit(lambda a, v: a.double.remote(v), 1) + >>> pool.submit(lambda a, v: a.double.remote(v), 2) + >>> print(pool.get_next(), pool.get_next()) + 2, 4 """ if self._idle_actors: actor = self._idle_actors.pop() @@ -152,38 +163,47 @@ def submit(self, fn, value): self._pending_submits.append((fn, value)) def has_next(self): - """Returns whether there are any pending results to return. + """ + Returns whether there are any pending results to return. Returns ------- - `True` if there are any pending results not yet returned. + ``True`` if there are any pending results not yet returned. Examples -------- - >>> pool = ActorPool(...) - >>> pool.submit(lambda a, v: a.double.remote(v), 1) - >>> print(pool.has_next()) - True - >>> print(pool.get_next()) - 2 - >>> print(pool.has_next()) - False + >>> pool = ActorPool(...) + >>> pool.submit(lambda a, v: a.double.remote(v), 1) + >>> print(pool.has_next()) + True + >>> print(pool.get_next()) + 2 + >>> print(pool.has_next()) + False """ return bool(self._future_to_actor) def get_next(self, timeout=None): - """Returns the next pending result in order. - This returns the next result produced by submit(), blocking for up to - the specified timeout until it is available. - Returns: - The next result. - Raises: - TimeoutError if the timeout is reached. - Examples: - >>> pool = ActorPool(...) - >>> pool.submit(lambda a, v: a.double.remote(v), 1) - >>> print(pool.get_next()) - 2 + """ + Returns the next pending result in order. + This returns the next result produced by :py:meth:`alibi.utils.distributed.ActorPool.submit`, blocking + for up to the specified timeout until it is available. + + Returns + ------- + The next result. + + Raises + ------ + TimeoutError + If the timeout is reached. + + Examples + --------- + >>> pool = ActorPool(...) + >>> pool.submit(lambda a, v: a.double.remote(v), 1) + >>> print(pool.get_next()) + 2 """ if not self.has_next(): raise StopIteration("No more results to get") @@ -202,23 +222,29 @@ def get_next(self, timeout=None): return self.ray.get(future) def get_next_unordered(self, timeout=None): - """Returns any of the next pending results. - This returns some result produced by submit(), blocking for up to - the specified timeout until it is available. Unlike get_next(), the - results are not always returned in same order as submitted, which can - improve performance. - Returns: - The next result. - Raises: - TimeoutError if the timeout is reached. - Examples: - >>> pool = ActorPool(...) - >>> pool.submit(lambda a, v: a.double.remote(v), 1) - >>> pool.submit(lambda a, v: a.double.remote(v), 2) - >>> print(pool.get_next_unordered()) - 4 - >>> print(pool.get_next_unordered()) - 2 + """ + Returns any of the next pending results. + This returns some result produced by :py:meth:`alibi.utils.distributed.ActorPool.submit()`, blocking for up to + the specified timeout until it is available. Unlike :py:meth:`alibi.utils.distributed.ActorPool.get_next()`, + the results are not always returned in same order as submitted, which can improve performance. + + Returns + ------- + The next result. + + Raises + ------ + `TimeoutError` if the timeout is reached. + + Examples + -------- + >>> pool = ActorPool(...) + >>> pool.submit(lambda a, v: a.double.remote(v), 1) + >>> pool.submit(lambda a, v: a.double.remote(v), 2) + >>> print(pool.get_next_unordered()) + 4 + >>> print(pool.get_next_unordered()) + 2 """ if not self.has_next(): raise StopIteration("No more results to get") @@ -255,20 +281,20 @@ def batch(X: np.ndarray, batch_size: Optional[int] = None, n_batches: int = 4) - X Array to be split. batch_size - The size of each batch. In particular: + The size of each batch. In particular - - if `batch_size` is not `None`, batches of this size are created. The sizes of the batches created might \ - vary if the 0-th dimension of `X` is not divisible by `batch_size`. For an array of length `l` that should \ - be split into `n` sections, it returns `l % n` sub-arrays of size `l//n + 1` and the rest of `size l//n` - - - if `batch_size` is `None`, then `X` is split into `n_batches` sub-arrays. + - if `batch_size` is not ``None``, batches of this size are created. The sizes of the batches created might \ + vary if the 0-th dimension of `X` is not divisible by `batch_size`. For an array of length `l` that should \ + be split into `n` sections, it returns `l % n` sub-arrays of size `l//n + 1` and the rest of `size l//n` + + - if `batch_size` is ``None``, then `X` is split into `n_batches` sub-arrays. n_batches - Number of batches in which to split the sub-array. Only used if `batch_size = None` + Number of batches in which to split the sub-array. Only used if ``batch_size = None`` Returns ------ - A list of sub-arrays of X. + A list of sub-arrays of `X`. """ # noqa W605 n_records = X.shape[0] @@ -296,10 +322,10 @@ def default_target_fcn(actor: Any, instances: tuple, kwargs: Optional[Dict] = No Parameters ---------- actor - A `ray` actor. This is typically a class decorated with the @ray.remote decorator, that has been subsequently + A `ray` actor. This is typically a class decorated with the `@ray.remote decorator`, that has been subsequently instantiated using ``cls.remote(*args, **kwargs)``. instances - A (batch_index, batch) tuple containing the batch of instances to be explained along with a batch index. + A `(batch_index, batch)` tuple containing the batch of instances to be explained along with a batch index. kwargs A list of keyword arguments for the actor `get_explanation` method. @@ -311,7 +337,7 @@ def default_target_fcn(actor: Any, instances: tuple, kwargs: Optional[Dict] = No ----- This function can be customized (e.g., if one does not desire to wrap the explainer such that it has `get_explanation` method. The customized function should be called `*_target_fcn` with the wildcard being replaced - by the name of the explanation method (e.g., cem, cfproto, etc). The same name should be added to the + by the name of the explanation method (e.g., `cem`, `cfproto`, etc). The same name should be added to the `distributed_opts` dictionary passed by the user prior to instantiating the `DistributedExplainer`. """ @@ -333,12 +359,13 @@ def concatenate_minibatches(minibatch_results: Union[List[np.ndarray], List[List ---------- minibatch_results Explanations for each minibatch. + Returns ------- - If the input is ``List[np.ndarray]``, a single numpy array obtained by concatenating `minibatch` results along - the 0th axis. \ - If the input is ``List[List[np.ndarray]]`` A list of numpy arrays obtained by concatenating arrays in with the - same position in the sublists along the 0th axis. + If the input is ``List[np.ndarray]``, a single `numpy` array obtained by concatenating `minibatch` results along \ + the 0th axis. + If the input is ``List[List[np.ndarray]]`` A list of `numpy` arrays obtained by concatenating arrays in with the \ + same position in the sublists along the 0th axis. """ # noqa W605 if isinstance(minibatch_results[0], np.ndarray): @@ -369,7 +396,7 @@ def invert_permutation(p: list) -> np.ndarray: Parameters ----------- p - Some permutation of 0, 1, ..., len(p)-1. Returns an array s, where s[i] gives the index of i in p. + Some permutation of `0, 1, ..., len(p)-1`. Returns an array `s`, where `s[i]` gives the index of `i` in `p`. Returns ------- @@ -396,10 +423,9 @@ def order_result(unordered_result: Generator[Tuple[int, Any], None, None]) -> Li ------- A list with re-ordered results. - Notes ----- - This should not be used if one wants to take advantage of the results being returned as they are calculated. + This should not be used if one wants to take advantage of the results being returned as they are calculated. """ result_order, results = list(zip(*[(idx, res) for idx, res in unordered_result])) @@ -419,7 +445,7 @@ class DistributedExplainer: """ if RAY_INSTALLED: import ray - ray = ray + ray = ray #: `ray` module. concatenate: Callable @@ -437,7 +463,6 @@ def __init__(self, Parameters ---------- - concatenate_results distributed_opts A dictionary with the following type (minimal signature):: @@ -447,19 +472,19 @@ class DistributedOpts(TypedDict): The dictionary may contain two additional keys: - - ``'actor_cpu_frac'`` (float, <= 1.0, >0.0): This is used to create more than one process on one \ - CPU/GPU. This may not speed up CPU intensive tasks but it is worth experimenting with when few physical \ - cores are available. In particular, this is highly useful when the user wants to share a GPU for \ - multiple tasks, with the caviat that the machine learning framework itself needs to support running \ - multiple replicas on the same GPU. See the ``ray`` documentation `here_` for details. + - ``'actor_cpu_frac'`` : ``(float, <= 1.0, >0.0)`` - This is used to create more than one process \ + on one CPU/GPU. This may not speed up CPU intensive tasks but it is worth experimenting with when \ + few physical cores are available. In particular, this is highly useful when the user wants to share \ + a GPU for multiple tasks, with the caviat that the machine learning framework itself needs to \ + support running multiple replicas on the same GPU. See the `ray` documentation `here_` for details. .. _here: https://docs.ray.io/en/stable/resources.html#fractional-resources - - ``'algorithm'``: this is specified internally by the caller. It is used in order to register target \ - function callbacks for the parallel pool These should be implemented in the global scope. \ - If not specified, its value will be ``'default'``, which will select a default target function which \ - expects the actor has a `get_explanation` method. + - ``'algorithm'`` : ``str`` - this is specified internally by the caller. It is used in order to \ + register target function callbacks for the parallel pool These should be implemented in the global \ + scope. If not specified, its value will be ``'default'``, which will select a default target function \ + which expects the actor has a `get_explanation` method. explainer_type Explainer class. @@ -467,13 +492,17 @@ class DistributedOpts(TypedDict): Positional arguments to explainer constructor. explainer_init_kwargs Keyword arguments to explainer constructor. + concatenate_results + If ``True`` concatenates the results. See :py:func:`alibi.utils.distributed.concatenate_minibatches` for + more details. return_generator - If `True` a generator that returns the results in the order the computation finishes is returned when - `get_explanation` is called. Otherwise, the order of the results is the same as the order of the minibatches. + If ``True`` a generator that returns the results in the order the computation finishes is returned when + `get_explanation` is called. Otherwise, the order of the results is the same as the order of the + minibatches. Notes ----- - When `return_generator=True`, the caller has to take elements from the generator (e.g., by calling `next`) in + When ``return_generator=True``, the caller has to take elements from the generator (e.g., by calling `next`) in order to start computing the results (because the `ray` pool is implemented as a generator). """ # noqa W605 @@ -485,7 +514,7 @@ class DistributedOpts(TypedDict): self.return_generator = return_generator self.concatenate_results = concatenate_results algorithm = distributed_opts.get('algorithm', 'default') - if 'algorithm' == 'default': + if algorithm == 'default': logger.warning( "No algorithm specified in distributed option, default target function will be selected." ) @@ -522,7 +551,7 @@ def __getattr__(self, item: str) -> Any: Returns ------- - The value of the attribute specified by `item`. + The value of the attribute specified by `item`. Raises ------ @@ -532,11 +561,11 @@ def __getattr__(self, item: str) -> Any: Notes ----- 1. This method assumes that the actor implements a `return_attribute` method. - 2. Note that we are indexing the idle actors. This means that if a pool was initialised with 5 actors and 3 \ - are busy, indexing with index 2 will raise an IndexError. - 3. The order of _idle_actors constantly changes - an actor is removed from it if there is a task to execute \ - and appended back when the task is complete. Therefore, indexing at the same position as computation \ - proceeds will result in retrieving state from different processes. + 2. Note that we are indexing the idle actors. This means that if a pool was initialised with 5 actors \ + and 3 are busy, indexing with index 2 will raise an `IndexError`. + 3. The order of `_idle_actors` constantly changes - an actor is removed from it if there is a task to \ + execute and appended back when the task is complete. Therefore, indexing at the same position as \ + computation proceeds will result in retrieving state from different processes. """ # noqa W605 @@ -562,7 +591,7 @@ def actor_index(self, value: int): def set_actor_index(self, value: int): """ - Sets actor index. This is used when the DistributedExplainer is in a separate process because `ray` does not + Sets actor index. This is used when the `DistributedExplainer` is in a separate process because `ray` does not support calling property setters remotely """ self._actor_index = value @@ -602,22 +631,22 @@ def get_explanation(self, X: np.ndarray, **kwargs) -> \ ---------- X A batch of instances to be explained. Split into batches according to the settings passed to the constructor. - kwargs + **kwargs Any keyword-arguments for the explainer `explain` method. Returns -------- - The explanations are returned as: + The explanations are returned as + + - a generator, if the `return_generator` option is specified. This is used so that the caller can access \ + the results as they are computed. This is the only case when this method is non-blocking and the caller \ + needs to call `next` on the generator to trigger the parallel computation. - - a generator, if the `return_generator` option is specified. This is used so that the caller can access \ - the results as they are computed. This is the only case when this method is non-blocking and the caller \ - needs to call `next` on the generator to trigger the parallel computation - - - a list of objects, whose type depends on the return type of the explainer. This is returned if no \ - custom preprocessing function is specified - - - an object, whose type depends on the return type of the concatenation function return when called with \ - a list of minibatch results with the same order as the minibatches + - a list of objects, whose type depends on the return type of the explainer. This is returned if no \ + custom preprocessing function is specified. + + - an object, whose type depends on the return type of the concatenation function return when called with \ + a list of minibatch results with the same order as the minibatches. """ # noqa E501 @@ -641,13 +670,13 @@ def get_explanation(self, X: np.ndarray, **kwargs) -> \ class PoolCollection: """ - A wrapper object that turns a DistributedExplainer into a remote actor. This allows running multiple distributed + A wrapper object that turns a `DistributedExplainer` into a remote actor. This allows running multiple distributed explainers in parallel. """ if RAY_INSTALLED: import ray - ray = ray + ray = ray #: `ray` module. def __init__(self, distributed_opts: Dict[str, Any], @@ -663,11 +692,11 @@ def __init__(self, Parameters ---------- distributed_opts , explainer_type, explainer_init_args, explainer_init_kwargs - See DistributedExplainer constructor documentation for explanations. Each entry in the list is a - different explainer configuration (e.g., CEM in PN vs PP mode, different background dataset sizes for SHAP, - etc). - kwargs - Any other kwargs, passed to the DistributedExplainer objects. + See :py:meth:`alibi.utils.distributed.DistributedExplainer` constructor documentation for explanations. + Each entry in the list is a different explainer configuration (e.g., CEM in PN vs PP mode, different + background dataset sizes for SHAP, etc). + **kwargs + Any other kwargs, passed to the `DistributedExplainer` objects. Raises ------ @@ -755,13 +784,13 @@ def create_explainer_handles(distributed_opts: Dict[str, Any], explainer_init_kwargs: List[Dict], **kwargs): """ - Creates multiple actors for DistributedExplainer so that tasks can be executed in parallel. The actors are + Creates multiple actors for `DistributedExplainer` so that tasks can be executed in parallel. The actors are initialised with different arguments, so they represent different explainers. Parameters ---------- - distributed_opts, explainer_type, explainer_init_args, explainer_init_kwargs, kwargs - See constructor. + distributed_opts, explainer_type, explainer_init_args, explainer_init_kwargs, **kwargs + See :py:meth:`alibi.utils.distributed.PoolCollection`. """ explainer_handles = [PoolCollection.ray.remote(DistributedExplainer) for _ in range(len(explainer_init_args))] @@ -798,8 +827,8 @@ def get_explanation(self, X, **kwargs) -> List: Raises ------ TypeError - If the user sets `return_generator=True` for the DistributedExplainer. This is because generators cannot be - pickled so one cannot call `ray.get`. + If the user sets ``return_generator=True`` for the DistributedExplainer. This is because generators + cannot be pickled so one cannot call `ray.get`. """ diff --git a/alibi/utils/distributions.py b/alibi/utils/distributions.py index e102c4906..eb9c23b1d 100644 --- a/alibi/utils/distributions.py +++ b/alibi/utils/distributions.py @@ -3,7 +3,7 @@ def kl_bernoulli(p: np.ndarray, q: np.ndarray) -> np.ndarray: """ - Compute KL-divergence between 2 probabilities p and q. len(p) divergences are calculated + Compute KL-divergence between 2 probabilities `p` and `q`. `len(p)` divergences are calculated simultaneously. Parameters @@ -15,7 +15,7 @@ def kl_bernoulli(p: np.ndarray, q: np.ndarray) -> np.ndarray: Returns ------- - Array with the KL-divergence between p and q. + Array with the KL-divergence between `p` and `q`. """ m = np.clip(p, 0.0000001, 0.9999999999999999).astype(float) diff --git a/alibi/utils/download.py b/alibi/utils/download.py index 4f3085d64..d3b4809ec 100644 --- a/alibi/utils/download.py +++ b/alibi/utils/download.py @@ -5,12 +5,12 @@ def spacy_model(model: str = 'en_core_web_md') -> None: """ - Download spaCy model. + Download `spaCy` model. Parameters ---------- model - Model to be downloaded + Model to be downloaded. """ try: spacy.load(model) diff --git a/alibi/utils/gradients.py b/alibi/utils/gradients.py index 32aa6a9c2..4f8b151a4 100644 --- a/alibi/utils/gradients.py +++ b/alibi/utils/gradients.py @@ -11,11 +11,11 @@ def perturb(X: np.ndarray, Parameters ---------- X - Array to be perturbed + Array to be perturbed. eps - Size of perturbation + Size of perturbation. proba - If True, the net effect of the perturbation needs to be 0 to keep the sum of the probabilities equal to 1 + If ``True``, the net effect of the perturbation needs to be 0 to keep the sum of the probabilities equal to 1. Returns ------- @@ -43,22 +43,22 @@ def num_grad_batch(func: Callable, eps: Union[float, np.ndarray] = 1e-08) -> np.ndarray: """ Calculate the numerical gradients of a vector-valued function (typically a prediction function in classification) - with respect to a batch of arrays X. + with respect to a batch of arrays `X`. Parameters ---------- func - Function to be differentiated + Function to be differentiated. X - A batch of vectors at which to evaluate the gradient of the function + A batch of vectors at which to evaluate the gradient of the function. args - Any additional arguments to pass to the function + Any additional arguments to pass to the function. eps - Gradient step to use in the numerical calculation, can be a single float or one for each feature + Gradient step to use in the numerical calculation, can be a single `float` or one for each feature. Returns ------- - An array of gradients at each point in the batch X + An array of gradients at each point in the batch `X`. """ # N = gradient batch size; F = nb of features in X, P = nb of prediction classes, B = instance batch size diff --git a/alibi/utils/lang_model.py b/alibi/utils/lang_model.py index f01ffe834..fac5ce69f 100644 --- a/alibi/utils/lang_model.py +++ b/alibi/utils/lang_model.py @@ -3,9 +3,9 @@ strategy. The `LanguageModel` base class defines basic functionalities as loading, storing, and predicting. Language model's tokenizers usually work at a subword level, and thus, a word can be split into subwords. For example, -a word can be decomposed as: word = [head_token tail_token_1 tail_token_2 ... tail_token_k]. For language models -such as DistilbertBaseUncased and BertBaseUncased, the tail tokens can be identified by a special prefix `##`. -On the other hand, for RobertaBase only the head is prefixed with the special character `Ġ`, thus the tail tokens +a word can be decomposed as: ``word = [head_token tail_token_1 tail_token_2 ... tail_token_k]``. For language models +such as `DistilbertBaseUncased` and `BertBaseUncased`, the tail tokens can be identified by a special prefix ``'##'``. +On the other hand, for `RobertaBase` only the head is prefixed with the special character ``'Ġ'``, thus the tail tokens can be identified by the absence of the special token. In this module, we refer to a tail token as a subword prefix. We will use the notion of a subword to refer to either a `head` or a `tail` token. @@ -31,7 +31,7 @@ class LanguageModel(abc.ABC): - SUBWORD_PREFIX = '' + SUBWORD_PREFIX = '' #: Language model subword prefix. # We don't type transformers objects here as it would likely require digging into # some private base classes which may change in the future and cause breaking changes. @@ -50,8 +50,8 @@ def __init__(self, model_path: str, preloading: bool = True): model_path `transformers` package model path. preloading - Whether to preload the online version of the transformer. - If `False`, a call to `from_disk` method is expected. + Whether to preload the online version of the transformer. If ``False``, a call to `from_disk` + method is expected. """ self.model_path = model_path @@ -104,12 +104,12 @@ def to_disk(self, path: Union[str, Path]): def is_subword_prefix(self, token: str) -> bool: """ Checks if the given token is a part of the tail of a word. Note that a word can - be split in multiple tokens (e.g., word = [head_token tail_token_1 tail_token_2 ... tail_token_k]). + be split in multiple tokens (e.g., ``word = [head_token tail_token_1 tail_token_2 ... tail_token_k]``). Each language model has a convention on how to mark a tail token. For example - DistilbertBaseUncased and BertBaseUncased have the tail tokens prefixed with the special - set of characters `##`. On the other hand, for RobertaBase only the head token is prefixed - with the special character 'Ġ' and thus we need to check the absence of the prefix to identify - the tail tokens. We call those special characters SUBWORD_PREFIX. Due to different conventions, + `DistilbertBaseUncased` and `BertBaseUncased` have the tail tokens prefixed with the special + set of characters ``'##'``. On the other hand, for `RobertaBase` only the head token is prefixed + with the special character ``'Ġ'`` and thus we need to check the absence of the prefix to identify + the tail tokens. We call those special characters `SUBWORD_PREFIX`. Due to different conventions, this method has to be implemented for each language model. See module docstring for namings. Parameters @@ -119,7 +119,7 @@ def is_subword_prefix(self, token: str) -> bool: Returns ------- - True if the given token is a subword prefix. False otherwise. + ``True`` if the given token is a subword prefix. ``False`` otherwise. """ pass @@ -129,9 +129,10 @@ def select_word(self, punctuation: str) -> str: """ Given a tokenized text and the starting index of a word, the function selects the entire word. - Note that a word is composed of multiple tokens (e.g., word = [head_token tail_token_1 - tail_token_2 ... tail_token_k]. The tail tokens can be identified based on the - presence/absence of SUBWORD_PREFIX. See `is_subword_prefix` for more details. + Note that a word is composed of multiple tokens (e.g., ``word = [head_token tail_token_1 + tail_token_2 ... tail_token_k]``). The tail tokens can be identified based on the + presence/absence of `SUBWORD_PREFIX`. See :py:meth:`alibi.utils.lang_model.LanguageModel.is_subword_prefix` + for more details. Parameters ---------- @@ -145,7 +146,7 @@ def select_word(self, Returns ------- - The word obtained by concatenation [head_token tail_token_1 tail_token_2 ... tail_token_k]. + The word obtained by concatenation ``[head_token tail_token_1 tail_token_2 ... tail_token_k]``. """ # define the ending index end_idx = start_idx + 1 @@ -180,11 +181,11 @@ def is_stop_word(self, stopwords: List of stop words. The words in this list should be lowercase. punctuation - Punctuation to be considered. See `select_entire_word`. + Punctuation to be considered. See :py:meth:`alibi.utils.lang_model.LanguageModel.select_entire_word`. Returns ------- - True if the `token` is in the `stopwords` list. False otherwise. + ``True`` if the `token` is in the `stopwords` list. ``False`` otherwise. """ if not stopwords: return False @@ -208,7 +209,7 @@ def is_punctuation(self, token: str, punctuation: str) -> bool: Returns ------- - True if the `token` is a punctuation. False otherwise. + ``True`` if the `token` is a punctuation. ``False`` otherwise. """ token = token.replace(self.SUBWORD_PREFIX, '').strip() return all([c in punctuation for c in token]) @@ -289,7 +290,7 @@ def predict_batch_lm(self, vocab_size: int, batch_size: int) -> np.ndarray: """ - Tensorflow language model batch predictions for AnchorText. + `Tensorflow` language model batch predictions for `AnchorText`. Parameters ---------- @@ -331,12 +332,12 @@ class DistilbertBaseUncased(LanguageModel): def __init__(self, preloading: bool = True): """ - Initialize DistilbertBaseUncased. + Initialize `DistilbertBaseUncased`. Parameters ---------- preloading - See `LanguageModel` constructor. + See :py:meth:`alibi.utils.lang_model.LanguageModel.__init__`. """ super().__init__("distilbert-base-uncased", preloading) @@ -353,12 +354,12 @@ class BertBaseUncased(LanguageModel): def __init__(self, preloading: bool = True): """ - Initialize BertBaseUncased. + Initialize `BertBaseUncased`. Parameters ---------- preloading - See `LanguageModel` constructor. + See :py:meth:`alibi.utils.lang_model.LanguageModel.__init__`. """ super().__init__("bert-base-uncased", preloading) @@ -375,12 +376,12 @@ class RobertaBase(LanguageModel): def __init__(self, preloading: bool = True): """ - Initialize RobertaBase + Initialize `RobertaBase`. Parameters ---------- preloading - See `LanguageModel` constructor. + See :py:meth:`alibi.utils.lang_model.LanguageModel.__init__` constructor. """ super().__init__("roberta-base", preloading) diff --git a/alibi/utils/mapping.py b/alibi/utils/mapping.py index 9f997ba7c..a57b9f5b3 100644 --- a/alibi/utils/mapping.py +++ b/alibi/utils/mapping.py @@ -5,7 +5,7 @@ def ohe_to_ord_shape(shape: tuple, cat_vars: Dict[int, int], is_ohe: bool = False) -> tuple: """ - Infer shape of instance if the categorical variables have ordinal instead of on-hot encoding. + Infer shape of instance if the categorical variables have ordinal instead of one-hot encoding. Parameters ---------- @@ -38,14 +38,14 @@ def ord_to_num(data: np.ndarray, dist: dict) -> np.ndarray: Parameters ---------- data - Numpy array with the categorical data. + `Numpy` array with the categorical data. dist Dict with as keys the categorical variables and as values the numerical value for each category. Returns ------- - Numpy array with transformed categorical data into numerical values. + `Numpy` array with transformed categorical data into numerical values. """ rng = data.shape[0] X = data.astype(np.float32, copy=True) @@ -66,14 +66,14 @@ def num_to_ord(data: np.ndarray, dist: dict) -> np.ndarray: Parameters ---------- data - Numpy array with the numerical data. + `Numpy` array with the numerical data. dist Dict with as keys the categorical variables and as values the numerical value for each category. Returns ------- - Numpy array with transformed numerical data into categories. + `Numpy` array with transformed numerical data into categories. """ X = data.copy() for k, v in dist.items(): diff --git a/alibi/utils/visualization.py b/alibi/utils/visualization.py index 562d5ffd7..ba75c55d3 100644 --- a/alibi/utils/visualization.py +++ b/alibi/utils/visualization.py @@ -90,106 +90,80 @@ def visualize_image_attr( fig_size: Tuple[int, int] = (6, 6), use_pyplot: bool = True, ): - r""" - Visualizes attribution for a given image by normalizing attribution values - of the desired sign (positive, negative, absolute value, or all) and displaying - them using the desired mode in a matplotlib figure. - - Parameters - ---------- - - attr - Numpy array corresponding to attributions to be - visualized. Shape must be in the form (H, W, C), with - channels as last dimension. Shape must also match that of - the original image if provided. - original_image - Numpy array corresponding to - original image. Shape must be in the form (H, W, C), with - channels as the last dimension. Image can be provided either - with float values in range 0-1 or int values between 0-255. - This is a necessary argument for any visualization method - which utilizes the original image. - method - Chosen method for visualizing attribution. - Supported options are: - 1. `heat_map` - Display heat map of chosen attributions - 2. `blended_heat_map` - Overlay heat map over greyscale - version of original image. Parameter alpha_overlay - corresponds to alpha of heat map. - 3. `original_image` - Only display original image. - 4. `masked_image` - Mask image (pixel-wise multiply) - by normalized attribution values. - 5. `alpha_scaling` - Sets alpha channel of each pixel - to be equal to normalized attribution value. - Default: `heat_map` - sign - Chosen sign of attributions to visualize. Supported - options are: - 1. `positive` - Displays only positive pixel attributions. - 2. `absolute_value` - Displays absolute value of - attributions. - 3. `negative` - Displays only negative pixel attributions. - 4. `all` - Displays both positive and negative attribution - values. This is not supported for `masked_image` or - `alpha_scaling` modes, since signed information cannot - be represented in these modes. - - plt_fig_axis - Tuple of matplotlib.pyplot.figure and axis - on which to visualize. If None is provided, then a new figure - and axis are created. - - outlier_perc - Top attribution values which - correspond to a total of outlier_perc percentage of the - total attribution are set to 1 and scaling is performed - using the minimum of these values. For sign=`all`, outliers a - nd scale value are computed using absolute value of - attributions. - - cmap - String corresponding to desired colormap for - heatmap visualization. This defaults to "Reds" for negative - sign, "Blues" for absolute value, "Greens" for positive sign, - and a spectrum from red to green for all. Note that this - argument is only used for visualizations displaying heatmaps. - - alpha_overlay - Alpha to set for heatmap when using - `blended_heat_map` visualization mode, which overlays the - heat map over the greyscaled original image. - - show_colorbar - Displays colorbar for heatmap below - the visualization. If given method does not use a heatmap, - then a colormap axis is created and hidden. This is - necessary for appropriate alignment when visualizing - multiple plots, some with colorbars and some without. - - title - Title string for plot. If None, no title is set. - - fig_size - Size of figure created. - - use_pyplot - If true, uses pyplot to create and show - figure and displays the figure after creating. If False, - uses Matplotlib object oriented API and simply returns a - figure object without showing. - - Returns - ------- - 2-element tuple of **figure**, **axis**: - - **figure** (*matplotlib.pyplot.figure*): - Figure object on which visualization - is created. If plt_fig_axis argument is given, this is the - same figure provided. - - **axis** (*matplotlib.pyplot.axis*): - Axis object on which visualization - is created. If plt_fig_axis argument is given, this is the - same axis provided. + """ + Visualizes attribution for a given image by normalizing attribution values of the desired sign + (``'positive'`` | ``'negative'`` | ``'absolute_value'`` | ``'all'``) and displaying them using the desired mode + in a `matplotlib` figure. + + Parameters + ---------- + attr + `Numpy` array corresponding to attributions to be visualized. Shape must be in the form `(H, W, C)`, with + channels as last dimension. Shape must also match that of the original image if provided. + original_image + `Numpy` array corresponding to original image. Shape must be in the form `(H, W, C)`, with channels as the + last dimension. Image can be provided either with `float` values in range 0-1 or `int` values between 0-255. + This is a necessary argument for any visualization method which utilizes the original image. + method + Chosen method for visualizing attribution. Supported options are: + + - ``'heat_map'`` - Display heat map of chosen attributions + + - ``'blended_heat_map'`` - Overlay heat map over greyscale version of original image. Parameter alpha_overlay \ + corresponds to alpha of heat map. + + - ``'original_image'`` - Only display original image. + + - ``'masked_image``' - Mask image (pixel-wise multiply) by normalized attribution values. + + - ``'alpha_scaling'`` - Sets alpha channel of each pixel to be equal to normalized attribution value. + + Default: ``'heat_map'``. + sign + Chosen sign of attributions to visualize. Supported options are: + + - ``'positive'`` - Displays only positive pixel attributions. + + - ``'absolute_value'`` - Displays absolute value of attributions. + + - ``'negative'`` - Displays only negative pixel attributions. + + - ``'all'`` - Displays both positive and negative attribution values. This is not supported for + ``'masked_image'`` or ``'alpha_scaling'`` modes, since signed information cannot be represented in these modes. + plt_fig_axis + Tuple of `matplotlib.pyplot.figure` and `axis` on which to visualize. If ``None`` is provided, then a new + figure and axis are created. + outlier_perc + Top attribution values which correspond to a total of `outlier_perc` percentage of the total attribution are + set to 1 and scaling is performed using the minimum of these values. For ``sign='all'``, outliers and scale + value are computed using absolute value of attributions. + cmap + String corresponding to desired colormap for heatmap visualization. This defaults to ``'Reds'`` for negative + sign, ``'Blues'`` for absolute value, ``'Greens'`` for positive sign, and a spectrum from red to green for all. + Note that this argument is only used for visualizations displaying heatmaps. + alpha_overlay + Visualizes attribution for a given image by normalizing attribution values of the desired sign (positive, + negative, absolute value, or all) and displaying them using the desired mode in a matplotlib figure. + show_colorbar + Displays colorbar for heatmap below the visualization. If given method does not use a heatmap, + then a colormap axis is created and hidden. This is necessary for appropriate alignment when visualizing + multiple plots, some with colorbars and some without. + title + Title string for plot. If ``None``, no title is set. + fig_size + Size of figure created. + use_pyplot + If ``True``, uses pyplot to create and show figure and displays the figure after creating. If ``False``, + uses `matplotlib` object oriented API and simply returns a figure object without showing. + + Returns + ------- + 2-element tuple of consisting of + - `figure` : ``matplotlib.pyplot.figure`` - Figure object on which visualization is created. If `plt_fig_axis` \ + argument is given, this is the same figure provided. + + - `axis` : ``matplotlib.pyplot.axis`` - Axis object on which visualization is created. If `plt_fig_axis` argument \ + is given, this is the same axis provided. """ # Create plot if figure, axis not provided diff --git a/alibi/utils/wrappers.py b/alibi/utils/wrappers.py index 0e3596766..9e49736ca 100644 --- a/alibi/utils/wrappers.py +++ b/alibi/utils/wrappers.py @@ -24,8 +24,8 @@ class ArgmaxTransformer: """ A transformer for converting classification output probability tensors to class labels. It assumes the predictor is a callable - that can be called with a N-tensor of data points `x` and produces - an N-tensor of outputs. + that can be called with a `N`-tensor of data points `x` and produces + an `N`-tensor of outputs. """ def __init__(self, predictor): @@ -45,8 +45,8 @@ def methdispatch(func): def wrapper(*args, **kw): return dispatch(args[0].__class__)(*args, **kw) - This uses singledispatch to do achieve this but instead uses args[1] - since args[0] will always be self. + This uses singledispatch to do achieve this but instead uses `args[1]` + since `args[0]` will always be self. """ dispatcher = singledispatch(func) diff --git a/doc/source/methods/ALE.ipynb b/doc/source/methods/ALE.ipynb index 6fd505f83..a9f57533b 100644 --- a/doc/source/methods/ALE.ipynb +++ b/doc/source/methods/ALE.ipynb @@ -64,13 +64,13 @@ "\n", "The result `exp` is an `Explanation` object which contains the following data-related attributes:\n", "\n", - " - `ale_values` - a list of arrays of ALE values (one for each feature). Each array can have multiple columns (if the number of targets is >1 as in classification)\n", - " - `constant_value` - the mean prediction over $X$ (zeroth order effects)\n", - " - `ale0` - a list of \"centering\" values (one for each feature) used by the algorithm to center the `ale_values` around the expected effect for the feature (i.e. the sum of `ale_values` and `ale0` will be the uncentered ALE)\n", - " - `feature_values` - a list of arrays (one for each feature) of feature values at which the ALE values were computed\n", - " - `feature_names` - a list of feature names\n", - " - `target_names` - a list of target names\n", - " - `feature_deciles` - a list of arrays (one for each feature) of the feature deciles\n", + " - `ale_values` - a list of arrays of ALE values (one for each feature). Each array can have multiple columns (if the number of targets is >1 as in classification).\n", + " - `constant_value` - the mean prediction over $X$ (zeroth order effects).\n", + " - `ale0` - a list of arrays of “centering” values (one for each feature) used by the algorithm to center the `ale_values` around the expected effect for the feature (i.e. the sum of `ale_values` and `ale0` will be the uncentered ALE).\n", + " - `feature_values` - a list of arrays (one for each feature) of feature values at which the ALE values were computed.\n", + " - `feature_names` - an array of feature names.\n", + " - `target_names` - an array of target names.\n", + " - `feature_deciles` - a list of arrays (one for each feature) of the feature deciles.\n", " \n", " \n", "Plotting `ale_values` against `feature_values` recovers the ALE curves. For convenience we include a plotting function `plot_ale` which automatically produces ALE plots using `matplotlib`:\n",