From 6ae963d284a75edc186724a05f90660df6633dd6 Mon Sep 17 00:00:00 2001
From: voetberg <magpie127@gmail.com>
Date: Thu, 6 Jun 2024 16:42:30 -0500
Subject: [PATCH 1/5] Run pre-lint, have pyproject rerun lock on push

---
 .github/workflows/test.yaml     |   3 +
 docs/source/conf.py             |  19 +-
 src/data/data.py                |   8 +-
 src/data/simulator.py           |   1 -
 src/metrics/local_two_sample.py | 194 ++++++++++++--------
 src/plots/cdf_ranks.py          |   5 +-
 src/plots/coverage_fraction.py  |   3 +-
 src/plots/local_two_sample.py   | 304 +++++++++++++++++++-------------
 src/plots/ranks.py              |   5 +-
 src/plots/tarp.py               |   1 -
 src/utils/defaults.py           |   4 +-
 tests/conftest.py               | 122 +++++++------
 tests/test_client.py            |  51 ++++--
 tests/test_evaluate.py          |  40 +++--
 tests/test_metrics.py           |  58 +++---
 tests/test_plots.py             |  64 ++++---
 16 files changed, 528 insertions(+), 354 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index c37b4bd..fda3255 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -32,6 +32,9 @@ jobs:
     - name: View poetry --help
       run: poetry --help
 
+    - name: Update lockfile
+      run: python -m poetry lock
+
     - name: Install dependencies
       shell: bash
       run: python -m poetry install
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8515ee8..4f0cba7 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,3 +1,7 @@
+import sys
+
+sys.path.append("../src")
+
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:
@@ -6,17 +10,14 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = 'DeepDiagnostics'
-copyright = '2024, Becky Nevin, M Voetberg, Brian Nord'
-author = 'Becky Nevin, M Voetberg, Brian Nord'
-release = '0.1.0'
+project = "DeepDiagnostics"
+copyright = "2024, Becky Nevin, M Voetberg, Brian Nord"
+author = "Becky Nevin, M Voetberg, Brian Nord"
+release = "0.1.0"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-import sys
-sys.path.append("../src")
-
 extensions = [
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
@@ -34,5 +35,5 @@
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'alabaster'
-html_static_path = ['_static']
+html_theme = "alabaster"
+html_static_path = ["_static"]
diff --git a/src/data/data.py b/src/data/data.py
index 129877d..4ab4bb1 100644
--- a/src/data/data.py
+++ b/src/data/data.py
@@ -41,7 +41,11 @@ def _load_simulator(self, name, simulator_kwargs):
 
         simulator = getattr(m, name)
 
-        simulator_kwargs = simulator_kwargs if simulator_kwargs is not None else get_item("data", "simulator_kwargs", raise_exception=False)
+        simulator_kwargs = (
+            simulator_kwargs
+            if simulator_kwargs is not None
+            else get_item("data", "simulator_kwargs", raise_exception=False)
+        )
         simulator_kwargs = {} if simulator_kwargs is None else simulator_kwargs
         simulator_instance = simulator(**simulator_kwargs)
 
@@ -102,7 +106,7 @@ def read_prior(self):
         raise NotImplementedError
 
     def load_prior(self, prior, prior_kwargs):
-        if prior is None: 
+        if prior is None:
             prior = get_item("data", "prior", raise_exception=False)
         try:
             prior = self.read_prior()
diff --git a/src/data/simulator.py b/src/data/simulator.py
index 983d8ea..7274086 100644
--- a/src/data/simulator.py
+++ b/src/data/simulator.py
@@ -1,4 +1,3 @@
-from typing import Any
 import numpy as np
 from abc import abstractmethod, ABC
 
diff --git a/src/metrics/local_two_sample.py b/src/metrics/local_two_sample.py
index 4ed1b4f..3d546ec 100644
--- a/src/metrics/local_two_sample.py
+++ b/src/metrics/local_two_sample.py
@@ -1,5 +1,5 @@
 from typing import Any, Optional, Union
-import numpy as np 
+import numpy as np
 
 from sklearn.model_selection import KFold
 from sklearn.neural_network import MLPClassifier
@@ -8,41 +8,58 @@
 from metrics.metric import Metric
 from utils.config import get_item
 
-class LocalTwoSampleTest(Metric): 
-    def __init__(self, model: Any, data: Any, out_dir: str | None = None, num_simulations: Optional[int] = None) -> None:
+
+class LocalTwoSampleTest(Metric):
+    def __init__(
+        self,
+        model: Any,
+        data: Any,
+        out_dir: str | None = None,
+        num_simulations: Optional[int] = None,
+    ) -> None:
         super().__init__(model, data, out_dir)
-        self.num_simulations = num_simulations if num_simulations is not None else get_item(
-            "metrics_common", "number_simulations", raise_exception=False
+        self.num_simulations = (
+            num_simulations
+            if num_simulations is not None
+            else get_item("metrics_common", "number_simulations", raise_exception=False)
         )
-    def _collect_data_params(self):
 
+    def _collect_data_params(self):
         # P is the prior and x_P is generated via the simulator from the parameters P.
         self.p = self.data.sample_prior(self.num_simulations)
         self.q = np.zeros_like(self.p)
 
-        self.outcome_given_p = np.zeros((self.num_simulations, self.data.simulator.generate_context().shape[-1]))
+        self.outcome_given_p = np.zeros(
+            (self.num_simulations, self.data.simulator.generate_context().shape[-1])
+        )
         self.outcome_given_q = np.zeros_like(self.outcome_given_p)
         self.evaluation_context = np.zeros_like(self.outcome_given_p)
 
-        for index, p in enumerate(self.p): 
+        for index, p in enumerate(self.p):
             context = self.data.simulator.generate_context()
             self.outcome_given_p[index] = self.data.simulator.simulate(p, context)
             # Q is the approximate posterior amortized in x
-            q =  self.model.sample_posterior(1, context).ravel()
+            q = self.model.sample_posterior(1, context).ravel()
             self.q[index] = q
             self.outcome_given_q[index] = self.data.simulator.simulate(q, context)
 
-        self.evaluation_context = np.array([self.data.simulator.generate_context() for _ in range(self.num_simulations)])
+        self.evaluation_context = np.array(
+            [
+                self.data.simulator.generate_context()
+                for _ in range(self.num_simulations)
+            ]
+        )
 
-    def train_linear_classifier(self, p, q, x_p, x_q, classifier:str, classifier_kwargs:dict={}): 
-        classifier_map = {
-            "MLP":MLPClassifier
-        }
-        try: 
+    def train_linear_classifier(
+        self, p, q, x_p, x_q, classifier: str, classifier_kwargs: dict = {}
+    ):
+        classifier_map = {"MLP": MLPClassifier}
+        try:
             classifier = classifier_map[classifier](**classifier_kwargs)
-        except KeyError: 
+        except KeyError:
             raise NotImplementedError(
-                f"{classifier} not implemented, choose from {list(classifier_map.keys())}.")
+                f"{classifier} not implemented, choose from {list(classifier_map.keys())}."
+            )
 
         joint_P_x = np.concatenate([p, x_p], axis=1)
         joint_Q_x = np.concatenate([q, x_q], axis=1)
@@ -50,7 +67,7 @@ def train_linear_classifier(self, p, q, x_p, x_q, classifier:str, classifier_kwa
         features = np.concatenate([joint_P_x, joint_Q_x], axis=0)
         labels = np.concatenate(
             [np.array([0] * len(joint_P_x)), np.array([1] * len(joint_Q_x))]
-        ).ravel() 
+        ).ravel()
 
         # shuffle features and labels
         features, labels = shuffle(features, labels)
@@ -59,44 +76,73 @@ def train_linear_classifier(self, p, q, x_p, x_q, classifier:str, classifier_kwa
         classifier.fit(X=features, y=labels)
         return classifier
 
-    def _eval_model(self, P, evaluation_sample, classifier): 
+    def _eval_model(self, P, evaluation_sample, classifier):
         evaluation = np.concatenate([P, evaluation_sample], axis=1)
         probability = classifier.predict_proba(evaluation)[:, 0]
-        return probability 
-
-    def _scores(self, p, q, x_p, x_q, classifier, cross_evaluate: bool=True, classifier_kwargs=None): 
+        return probability
+
+    def _scores(
+        self,
+        p,
+        q,
+        x_p,
+        x_q,
+        classifier,
+        cross_evaluate: bool = True,
+        classifier_kwargs=None,
+    ):
         model_probabilities = []
-        for model, model_args in zip(classifier, classifier_kwargs): 
-            if cross_evaluate: 
-                model_probabilities.append(self._cross_eval_score(p, q, x_p, x_q, model, model_args))
-            else: 
-                trained_model = self.train_linear_classifier(p, q, x_p, x_q, model, model_args)
-                model_probabilities.append(self._eval_model(P=p, classifier=trained_model))
+        for model, model_args in zip(classifier, classifier_kwargs):
+            if cross_evaluate:
+                model_probabilities.append(
+                    self._cross_eval_score(p, q, x_p, x_q, model, model_args)
+                )
+            else:
+                trained_model = self.train_linear_classifier(
+                    p, q, x_p, x_q, model, model_args
+                )
+                model_probabilities.append(
+                    self._eval_model(P=p, classifier=trained_model)
+                )
 
         return np.mean(model_probabilities, axis=0)
 
-    def _cross_eval_score(self, p, q, x_p, x_q, classifier, classifier_kwargs, n_cross_folds=5): 
-        kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42) # Getting the shape
+    def _cross_eval_score(
+        self, p, q, x_p, x_q, classifier, classifier_kwargs, n_cross_folds=5
+    ):
+        kf = KFold(
+            n_splits=n_cross_folds, shuffle=True, random_state=42
+        )  # Getting the shape
         cv_splits = kf.split(p)
         # train classifiers over cv-folds
         probabilities = []
-        self.evaluation_data = np.zeros((n_cross_folds, len(next(cv_splits)[1]), self.evaluation_context.shape[-1]))
+        self.evaluation_data = np.zeros(
+            (n_cross_folds, len(next(cv_splits)[1]), self.evaluation_context.shape[-1])
+        )
         self.prior_evaluation = np.zeros_like(p)
-        
-        kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42) 
+
+        kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42)
         cv_splits = kf.split(p)
         for cross_trial, (train_index, val_index) in enumerate(cv_splits):
             # get train split
-            p_train, x_p_train = p[train_index,:], x_p[train_index,:]
-            q_train, x_q_train = q[train_index,:], x_q[train_index,:]
-            trained_nth_classifier = self.train_linear_classifier(p_train, q_train, x_p_train, x_q_train, classifier, classifier_kwargs)
+            p_train, x_p_train = p[train_index, :], x_p[train_index, :]
+            q_train, x_q_train = q[train_index, :], x_q[train_index, :]
+            trained_nth_classifier = self.train_linear_classifier(
+                p_train, q_train, x_p_train, x_q_train, classifier, classifier_kwargs
+            )
             p_evaluate = p[val_index]
-            for index, p_validation in enumerate(p_evaluate): 
+            for index, p_validation in enumerate(p_evaluate):
                 self.evaluation_data[cross_trial][index] = self.data.simulator.simulate(
                     p_validation, self.evaluation_context[val_index][index]
                 )
             self.prior_evaluation[index] = p_validation
-            probabilities.append(self._eval_model(p_evaluate, self.evaluation_data[cross_trial], trained_nth_classifier))
+            probabilities.append(
+                self._eval_model(
+                    p_evaluate,
+                    self.evaluation_data[cross_trial],
+                    trained_nth_classifier,
+                )
+            )
         return probabilities
 
     def permute_data(self, P, Q):
@@ -110,38 +156,38 @@ def permute_data(self, P, Q):
         X = np.concatenate([P, Q], axis=0)
         X_perm = X[self.data.rng.permutation(np.arange(n_samples * 2))]
         return X_perm[:n_samples], X_perm[n_samples:]
-        
+
     def calculate(
-            self, 
-            linear_classifier:Union[str, list[str]]='MLP', 
-            cross_evaluate:bool=True, 
-            n_null_hypothesis_trials=100, 
-            classifier_kwargs:Union[dict, list[dict]]=None
-        ):
-
-        if isinstance(linear_classifier, str): 
+        self,
+        linear_classifier: Union[str, list[str]] = "MLP",
+        cross_evaluate: bool = True,
+        n_null_hypothesis_trials=100,
+        classifier_kwargs: Union[dict, list[dict]] = None,
+    ):
+        if isinstance(linear_classifier, str):
             linear_classifier = [linear_classifier]
 
-        if classifier_kwargs is None: 
+        if classifier_kwargs is None:
             classifier_kwargs = {}
-        if isinstance(classifier_kwargs, dict): 
+        if isinstance(classifier_kwargs, dict):
             classifier_kwargs = [classifier_kwargs]
 
         probabilities = self._scores(
-            self.p, 
-            self.q, 
-            self.outcome_given_p, 
-            self.outcome_given_q, 
-            classifier=linear_classifier, 
-            cross_evaluate=cross_evaluate, 
-            classifier_kwargs=classifier_kwargs
+            self.p,
+            self.q,
+            self.outcome_given_p,
+            self.outcome_given_q,
+            classifier=linear_classifier,
+            cross_evaluate=cross_evaluate,
+            classifier_kwargs=classifier_kwargs,
         )
         null_hypothesis_probabilities = []
-        for _ in range(n_null_hypothesis_trials): 
+        for _ in range(n_null_hypothesis_trials):
             joint_P_x = np.concatenate([self.p, self.outcome_given_p], axis=1)
             joint_Q_x = np.concatenate([self.q, self.outcome_given_q], axis=1)
             joint_P_x_perm, joint_Q_x_perm = self.permute_data(
-                joint_P_x, joint_Q_x,
+                joint_P_x,
+                joint_Q_x,
             )
             p_null = joint_P_x_perm[:, : self.p.shape[-1]]
             p_given_x_null = joint_P_x_perm[:, self.p.shape[-1] :]
@@ -149,29 +195,29 @@ def calculate(
             q_given_x_null = joint_Q_x_perm[:, self.q.shape[-1] :]
 
             null_result = self._scores(
-                p_null, 
-                q_null, 
-                p_given_x_null, 
-                q_given_x_null, 
-                classifier=linear_classifier, 
-                cross_evaluate=cross_evaluate, 
-                classifier_kwargs=classifier_kwargs
+                p_null,
+                q_null,
+                p_given_x_null,
+                q_given_x_null,
+                classifier=linear_classifier,
+                cross_evaluate=cross_evaluate,
+                classifier_kwargs=classifier_kwargs,
             )
 
             null_hypothesis_probabilities.append(null_result)
-        
-        null =  np.array(null_hypothesis_probabilities)
+
+        null = np.array(null_hypothesis_probabilities)
         self.output = {
-            "lc2st_probabilities": probabilities, 
-            "lc2st_null_hypothesis_probabilities": null
+            "lc2st_probabilities": probabilities,
+            "lc2st_null_hypothesis_probabilities": null,
         }
         return probabilities, null
-    
+
     def __call__(self, **kwds: Any) -> Any:
-        try: 
+        try:
             self._collect_data_params()
-        except NotImplementedError: 
-            pass 
+        except NotImplementedError:
+            pass
 
         self.calculate(**kwds)
-        self._finish()
\ No newline at end of file
+        self._finish()
diff --git a/src/plots/cdf_ranks.py b/src/plots/cdf_ranks.py
index 62b7a20..c0e7c77 100644
--- a/src/plots/cdf_ranks.py
+++ b/src/plots/cdf_ranks.py
@@ -45,7 +45,10 @@ def _data_setup(self):
         context = tensor(self.data.true_context())
 
         ranks, _ = run_sbc(
-            thetas, context, self.model.posterior, num_posterior_samples=self.num_samples
+            thetas,
+            context,
+            self.model.posterior,
+            num_posterior_samples=self.num_samples,
         )
         self.ranks = ranks
 
diff --git a/src/plots/coverage_fraction.py b/src/plots/coverage_fraction.py
index bbfe293..128868c 100644
--- a/src/plots/coverage_fraction.py
+++ b/src/plots/coverage_fraction.py
@@ -1,6 +1,5 @@
 import numpy as np
 import matplotlib.pyplot as plt
-from matplotlib import colormaps as cm
 
 from metrics.coverage_fraction import CoverageFraction as coverage_fraction_metric
 from plots.plot import Display
@@ -18,7 +17,7 @@ def __init__(
         parameter_labels=None,
         figure_size=None,
         line_styles=None,
-        parameter_colors=None
+        parameter_colors=None,
     ):
         super().__init__(model, data, save, show, out_dir)
 
diff --git a/src/plots/local_two_sample.py b/src/plots/local_two_sample.py
index 0735d39..3c63618 100644
--- a/src/plots/local_two_sample.py
+++ b/src/plots/local_two_sample.py
@@ -1,7 +1,7 @@
 from typing import Optional, Sequence, Union
 import matplotlib.pyplot as plt
 from matplotlib import cm
-import numpy as np 
+import numpy as np
 from matplotlib.colors import Normalize
 from matplotlib.patches import Rectangle
 
@@ -10,62 +10,94 @@
 from utils.config import get_item
 from utils.plotting_utils import get_hex_colors
 
-class LocalTwoSampleTest(Display): 
-
-    # https://github.com/JuliaLinhart/lc2st/blob/e221cc326480cb0daadfd2ba50df4eefd374793b/lc2st/graphical_diagnostics.py#L133 
-
-    def __init__(self, 
-                 model, 
-                 data, 
-                 save:bool, 
-                 show:bool, 
-                 out_dir:Optional[str]=None, 
-                 percentiles: Optional[Sequence] = None, 
-                 parameter_names: Optional[Sequence] = None, 
-                 parameter_colors: Optional[Sequence]= None, 
-                 figure_size: Optional[Sequence] = None,  
-                 num_simulations: Optional[int] = None, 
-                 colorway: Optional[str]=None): 
-        super().__init__(model, data, save, show, out_dir)
-        self.percentiles = percentiles if percentiles is not None else get_item("metrics_common", item='percentiles', raise_exception=False)
 
-        self.param_names = parameter_names if parameter_names is not None else get_item("plots_common", item="parameter_labels", raise_exception=False)
-        self.param_colors =  parameter_colors if parameter_colors is not None else get_item("plots_common", item="parameter_colors", raise_exception=False)
-        self.figure_size =  figure_size if figure_size is not None else get_item("plots_common", item="figure_size", raise_exception=False)
+class LocalTwoSampleTest(Display):
+    # https://github.com/JuliaLinhart/lc2st/blob/e221cc326480cb0daadfd2ba50df4eefd374793b/lc2st/graphical_diagnostics.py#L133
+
+    def __init__(
+        self,
+        model,
+        data,
+        save: bool,
+        show: bool,
+        out_dir: Optional[str] = None,
+        percentiles: Optional[Sequence] = None,
+        parameter_names: Optional[Sequence] = None,
+        parameter_colors: Optional[Sequence] = None,
+        figure_size: Optional[Sequence] = None,
+        num_simulations: Optional[int] = None,
+        colorway: Optional[str] = None,
+    ):
+        super().__init__(model, data, save, show, out_dir)
+        self.percentiles = (
+            percentiles
+            if percentiles is not None
+            else get_item("metrics_common", item="percentiles", raise_exception=False)
+        )
 
-        colorway = colorway if colorway is not None else get_item(
-                "plots_common", "default_colorway", raise_exception=False
+        self.param_names = (
+            parameter_names
+            if parameter_names is not None
+            else get_item(
+                "plots_common", item="parameter_labels", raise_exception=False
+            )
+        )
+        self.param_colors = (
+            parameter_colors
+            if parameter_colors is not None
+            else get_item(
+                "plots_common", item="parameter_colors", raise_exception=False
             )
-        self.region_colors = get_hex_colors(n_colors=len(self.percentiles), colorway=colorway)
+        )
+        self.figure_size = (
+            figure_size
+            if figure_size is not None
+            else get_item("plots_common", item="figure_size", raise_exception=False)
+        )
 
-        num_simulations = num_simulations if num_simulations is not None else get_item(
-            "metrics_common", "number_simulations", raise_exception=False
+        colorway = (
+            colorway
+            if colorway is not None
+            else get_item("plots_common", "default_colorway", raise_exception=False)
+        )
+        self.region_colors = get_hex_colors(
+            n_colors=len(self.percentiles), colorway=colorway
+        )
+
+        num_simulations = (
+            num_simulations
+            if num_simulations is not None
+            else get_item("metrics_common", "number_simulations", raise_exception=False)
         )
         self.l2st = l2st(model, data, out_dir, num_simulations)
 
-    def _plot_name(self): 
+    def _plot_name(self):
         return "local_C2ST.png"
 
-    def _make_pairplot_values(self, random_samples): 
-        pp_vals = np.array([np.mean(random_samples <= alpha) for alpha in self.cdf_alphas])
+    def _make_pairplot_values(self, random_samples):
+        pp_vals = np.array(
+            [np.mean(random_samples <= alpha) for alpha in self.cdf_alphas]
+        )
         return pp_vals
 
-    def lc2st_pairplot(self, subplot, confidence_region_alpha=0.2): 
-
+    def lc2st_pairplot(self, subplot, confidence_region_alpha=0.2):
         null_cdf = self._make_pairplot_values([0.5] * len(self.probability))
         subplot.plot(
             self.cdf_alphas, null_cdf, "--", color="black", label="Theoretical Null CDF"
-        )   
+        )
 
         null_hypothesis_pairplot = np.zeros((len(self.cdf_alphas), *null_cdf.shape))
 
         for t in range(len(self.null_hypothesis_probability)):
-            null_hypothesis_pairplot[t] = self._make_pairplot_values(self.null_hypothesis_probability[t])
-
+            null_hypothesis_pairplot[t] = self._make_pairplot_values(
+                self.null_hypothesis_probability[t]
+            )
 
-        for percentile, color in zip(self.percentiles, self.region_colors): 
-            low_null = np.quantile(null_hypothesis_pairplot, percentile/100, axis=1)
-            up_null = np.quantile(null_hypothesis_pairplot, (100-percentile)/100, axis=1)
+        for percentile, color in zip(self.percentiles, self.region_colors):
+            low_null = np.quantile(null_hypothesis_pairplot, percentile / 100, axis=1)
+            up_null = np.quantile(
+                null_hypothesis_pairplot, (100 - percentile) / 100, axis=1
+            )
 
             subplot.fill_between(
                 self.cdf_alphas,
@@ -76,52 +108,68 @@ def lc2st_pairplot(self, subplot, confidence_region_alpha=0.2):
                 label=f"{percentile}% Conf. region",
             )
 
-        for prob, label, color in zip(self.probability, self.param_names, self.param_colors):
+        for prob, label, color in zip(
+            self.probability, self.param_names, self.param_colors
+        ):
             pairplot_values = self._make_pairplot_values(prob)
             subplot.plot(self.cdf_alphas, pairplot_values, label=label, color=color)
 
-    def probability_intensity(self, subplot, features, n_bins=20): 
+    def probability_intensity(self, subplot, features, n_bins=20):
         evaluation_data = self.l2st.evaluation_data
         norm = Normalize(vmin=0, vmax=1)
-        if len(evaluation_data.shape) >=3: # Used the kfold option 
-            evaluation_data = evaluation_data.reshape((
-                evaluation_data.shape[0]*evaluation_data.shape[1], 
-                evaluation_data.shape[-1]))
+        if len(evaluation_data.shape) >= 3:  # Used the kfold option
+            evaluation_data = evaluation_data.reshape(
+                (
+                    evaluation_data.shape[0] * evaluation_data.shape[1],
+                    evaluation_data.shape[-1],
+                )
+            )
             self.probability = self.probability.ravel()
 
-        try: 
+        try:
             # If there is only one feature
             int(features)
 
             _, bins, patches = subplot.hist(
-                evaluation_data[:,features], n_bins, weights=self.probability, density=True, color=self.param_colors[features])
+                evaluation_data[:, features],
+                n_bins,
+                weights=self.probability,
+                density=True,
+                color=self.param_colors[features],
+            )
 
             eval_bins = np.select(
-                [evaluation_data[:,features] <= i for i in bins[1:]], list(range(n_bins))
+                [evaluation_data[:, features] <= i for i in bins[1:]],
+                list(range(n_bins)),
             )
 
             # get mean predicted proba for each bin
-            weights = np.array([self.probability[eval_bins==i].mean() for i in np.unique(eval_bins)]) #df_probas.groupby(["bins"]).mean().probas
+            weights = np.array(
+                [self.probability[eval_bins == i].mean() for i in np.unique(eval_bins)]
+            )  # df_probas.groupby(["bins"]).mean().probas
             colors = plt.get_cmap(self.colorway)
 
             for w, p in zip(weights, patches):
                 p.set_facecolor(colors(norm(w)))  # color is mean predicted proba
 
-
-        except TypeError: 
+        except TypeError:
             _, x_edges, y_edges, image = subplot.hist2d(
-                evaluation_data[:,features[0]], 
-                evaluation_data[:,features[1]], 
-                n_bins, 
-                density=True,  color="white")
-            
-            image.remove() 
+                evaluation_data[:, features[0]],
+                evaluation_data[:, features[1]],
+                n_bins,
+                density=True,
+                color="white",
+            )
+
+            image.remove()
 
             eval_bins_dim_1 = np.select(
-                [evaluation_data[:,features[0]] <= i for i in x_edges[1:]], list(range(n_bins))
+                [evaluation_data[:, features[0]] <= i for i in x_edges[1:]],
+                list(range(n_bins)),
             )
             eval_bins_dim_2 = np.select(
-                [evaluation_data[:,features[1]] <= i for i in y_edges[1:]], list(range(n_bins))
+                [evaluation_data[:, features[1]] <= i for i in y_edges[1:]],
+                list(range(n_bins)),
             )
 
             colors = plt.get_cmap(self.colorway)
@@ -129,17 +177,20 @@ def probability_intensity(self, subplot, features, n_bins=20):
             weights = np.empty((n_bins, n_bins)) * np.nan
             for i in range(n_bins):
                 for j in range(n_bins):
-                    local_and = np.logical_and(eval_bins_dim_1==i, eval_bins_dim_2==j)
-                    if local_and.any(): 
-                        weights[i, j] = self.probability[np.logical_and(eval_bins_dim_1==i, eval_bins_dim_2==j)].mean() 
-                        
+                    local_and = np.logical_and(
+                        eval_bins_dim_1 == i, eval_bins_dim_2 == j
+                    )
+                    if local_and.any():
+                        weights[i, j] = self.probability[
+                            np.logical_and(eval_bins_dim_1 == i, eval_bins_dim_2 == j)
+                        ].mean()
 
             for i in range(len(x_edges) - 1):
                 for j in range(len(y_edges) - 1):
-                    weight = weights[i,j]
+                    weight = weights[i, j]
                     facecolor = colors(norm(weight))
                     # if no sample in bin, set color to white
-                    if weight == np.nan: 
+                    if weight == np.nan:
                         facecolor = "white"
 
                     rect = Rectangle(
@@ -151,36 +202,41 @@ def probability_intensity(self, subplot, features, n_bins=20):
                     )
                     subplot.add_patch(rect)
 
-            
-    def _plot(self, 
-            use_intensity_plot:bool=True, 
-            n_alpha_samples:int=100, 
-            confidence_region_alpha:float=0.2,
-            n_intensity_bins:int=20, 
-            linear_classifier:Union[str, list[str]]='MLP', 
-            cross_evaluate:bool=True, 
-            n_null_hypothesis_trials=100, 
-            classifier_kwargs:Union[dict, list[dict]]=None, 
-            pairplot_y_label="Empirical CDF",
-            pairplot_x_label="", 
-            pairplot_title="Local Classifier PP-Plot", 
-            intensity_plot_ylabel="", 
-            intensity_plot_xlabel="", 
-            intensity_plot_title="Local Classifier Intensity Distribution",
-        ):
-
-        # Plots to make - 
+    def _plot(
+        self,
+        use_intensity_plot: bool = True,
+        n_alpha_samples: int = 100,
+        confidence_region_alpha: float = 0.2,
+        n_intensity_bins: int = 20,
+        linear_classifier: Union[str, list[str]] = "MLP",
+        cross_evaluate: bool = True,
+        n_null_hypothesis_trials=100,
+        classifier_kwargs: Union[dict, list[dict]] = None,
+        pairplot_y_label="Empirical CDF",
+        pairplot_x_label="",
+        pairplot_title="Local Classifier PP-Plot",
+        intensity_plot_ylabel="",
+        intensity_plot_xlabel="",
+        intensity_plot_title="Local Classifier Intensity Distribution",
+    ):
+        # Plots to make -
         # pp_plot_lc2st: https://github.com/JuliaLinhart/lc2st/blob/e221cc326480cb0daadfd2ba50df4eefd374793b/lc2st/graphical_diagnostics.py#L49
-        # eval_space_with_proba_intensity: https://github.com/JuliaLinhart/lc2st/blob/e221cc326480cb0daadfd2ba50df4eefd374793b/lc2st/graphical_diagnostics.py#L133 
-  
-        self.l2st(**{
-            "linear_classifier":linear_classifier, 
-            "cross_evaluate": cross_evaluate, 
-            "n_null_hypothesis_trials": n_null_hypothesis_trials, 
-            "classifier_kwargs": classifier_kwargs})
-        
-        self.probability, self.null_hypothesis_probability = self.l2st.output["lc2st_probabilities"], self.l2st.output["lc2st_null_hypothesis_probabilities"]
-        
+        # eval_space_with_proba_intensity: https://github.com/JuliaLinhart/lc2st/blob/e221cc326480cb0daadfd2ba50df4eefd374793b/lc2st/graphical_diagnostics.py#L133
+
+        self.l2st(
+            **{
+                "linear_classifier": linear_classifier,
+                "cross_evaluate": cross_evaluate,
+                "n_null_hypothesis_trials": n_null_hypothesis_trials,
+                "classifier_kwargs": classifier_kwargs,
+            }
+        )
+
+        self.probability, self.null_hypothesis_probability = (
+            self.l2st.output["lc2st_probabilities"],
+            self.l2st.output["lc2st_null_hypothesis_probabilities"],
+        )
+
         fig, subplots = plt.subplots(1, 1, figsize=self.figure_size)
         self.cdf_alphas = np.linspace(0, 1, n_alpha_samples)
 
@@ -194,63 +250,65 @@ def _plot(self,
         self.plot_name = "local_c2st_pp_plot.png"
         self._finish()
 
-        if use_intensity_plot: 
-
-            fig, subplots = plt.subplots(len(self.param_names), len(self.param_names), figsize=(self.figure_size[0]*1.2, self.figure_size[1]))
+        if use_intensity_plot:
+            fig, subplots = plt.subplots(
+                len(self.param_names),
+                len(self.param_names),
+                figsize=(self.figure_size[0] * 1.2, self.figure_size[1]),
+            )
             combos_run = []
-            for x_index, x_param in enumerate(self.param_names): 
-                for y_index, y_param in enumerate(self.param_names): 
-                    
-                    if ({x_index, y_index} not in combos_run) and (x_index>=y_index): 
+            for x_index, x_param in enumerate(self.param_names):
+                for y_index, y_param in enumerate(self.param_names):
+                    if ({x_index, y_index} not in combos_run) and (x_index >= y_index):
                         subplot = subplots[x_index][y_index]
 
-                        if x_index == y_index: 
+                        if x_index == y_index:
                             features = x_index
-                        else: 
+                        else:
                             features = [x_index, y_index]
 
                         self.probability_intensity(
-                            subplot, 
-                            features=features,
-                            n_bins=n_intensity_bins
+                            subplot, features=features, n_bins=n_intensity_bins
                         )
                         combos_run.append({x_index, y_index})
 
-                    if (x_index<y_index): 
+                    if x_index < y_index:
                         subplots[x_index][y_index].axes.get_xaxis().set_visible(False)
                         subplots[x_index][y_index].axes.get_yaxis().set_visible(False)
-                    
-                    if x_index == len(self.param_names)-1: 
+
+                    if x_index == len(self.param_names) - 1:
                         subplots[x_index][y_index].set_xlabel(x_param)
 
-                    if y_index == 0: 
+                    if y_index == 0:
                         subplots[x_index][y_index].set_ylabel(y_param)
 
-        for index, y_label in enumerate(self.param_names): 
+        for index, y_label in enumerate(self.param_names):
             subplots[index][0].set_ylabel(y_label)
 
-        for index, x_label in enumerate(self.param_names): 
-            subplots[len(self.param_names)-1][-1*index].set_xlabel(x_label)
-
+        for index, x_label in enumerate(self.param_names):
+            subplots[len(self.param_names) - 1][-1 * index].set_xlabel(x_label)
 
         fig.supylabel(intensity_plot_ylabel)
         fig.supxlabel(intensity_plot_xlabel)
         fig.suptitle(intensity_plot_title)
         norm = Normalize(vmin=0, vmax=1)
 
-        fig.colorbar(cm.ScalarMappable(norm=norm, cmap=self.colorway), ax=subplots.ravel().tolist())
+        fig.colorbar(
+            cm.ScalarMappable(norm=norm, cmap=self.colorway),
+            ax=subplots.ravel().tolist(),
+        )
 
         self.plot_name = "local_c2st_corner_plot.png"
         self._finish()
 
     def __call__(self, **plot_args) -> None:
-        try: 
+        try:
             self._data_setup()
-        except NotImplementedError: 
-            pass 
-        try: 
-            self._plot_settings() 
-        except NotImplementedError: 
-            pass 
-        
-        self._plot(**plot_args)
\ No newline at end of file
+        except NotImplementedError:
+            pass
+        try:
+            self._plot_settings()
+        except NotImplementedError:
+            pass
+
+        self._plot(**plot_args)
diff --git a/src/plots/ranks.py b/src/plots/ranks.py
index 050dbca..2e05a1f 100644
--- a/src/plots/ranks.py
+++ b/src/plots/ranks.py
@@ -20,7 +20,10 @@ def _data_setup(self):
         )
 
         ranks, _ = run_sbc(
-            thetas, context, self.model.posterior, num_posterior_samples=self.num_samples
+            thetas,
+            context,
+            self.model.posterior,
+            num_posterior_samples=self.num_samples,
         )
         self.ranks = ranks
 
diff --git a/src/plots/tarp.py b/src/plots/tarp.py
index e11c54d..798ae54 100644
--- a/src/plots/tarp.py
+++ b/src/plots/tarp.py
@@ -1,5 +1,4 @@
 from typing import Optional, Union
-from torch import tensor
 import numpy as np
 import tarp
 
diff --git a/src/utils/defaults.py b/src/utils/defaults.py
index 3e5a1ed..3886de7 100644
--- a/src/utils/defaults.py
+++ b/src/utils/defaults.py
@@ -7,8 +7,8 @@
     },
     "model": {"model_engine": "SBIModel"},
     "data": {
-        "data_engine": "H5Data", 
-        "prior":"normal", 
+        "data_engine": "H5Data",
+        "prior": "normal",
         "prior_kwargs": None,
         "simulator_kwargs": None,
     },
diff --git a/tests/conftest.py b/tests/conftest.py
index 094fbb6..20282bc 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
-import pytest 
-import yaml 
-import numpy as np 
+import pytest
+import yaml
+import numpy as np
 
 from data import H5Data
 from data.simulator import Simulator
@@ -8,14 +8,16 @@
 from utils.register import register_simulator
 
 
-class MockSimulator(Simulator): 
+class MockSimulator(Simulator):
     def generate_context(self, n_samples: int) -> np.ndarray:
         return np.linspace(0, 100, n_samples)
-    
+
     def simulate(self, theta: np.ndarray, context_samples: np.ndarray) -> np.ndarray:
         thetas = np.atleast_2d(theta)
         if thetas.shape[1] != 2:
-            raise ValueError("Input tensor must have shape (n, 2) where n is the number of parameter sets.")
+            raise ValueError(
+                "Input tensor must have shape (n, 2) where n is the number of parameter sets."
+            )
 
         if thetas.shape[0] == 1:
             # If there's only one set of parameters, extract them directly
@@ -25,8 +27,10 @@ def simulate(self, theta: np.ndarray, context_samples: np.ndarray) -> np.ndarray
             m, b = thetas[:, 0], thetas[:, 1]
         rs = np.random.RandomState()
         sigma = 1
-        epsilon = rs.normal(loc=0, scale=sigma, size=(len(context_samples), thetas.shape[0]))
-        
+        epsilon = rs.normal(
+            loc=0, scale=sigma, size=(len(context_samples), thetas.shape[0])
+        )
+
         # Initialize an empty array to store the results for each set of parameters
         y = np.zeros((len(context_samples), thetas.shape[0]))
         for i in range(thetas.shape[0]):
@@ -34,81 +38,95 @@ def simulate(self, theta: np.ndarray, context_samples: np.ndarray) -> np.ndarray
             y[:, i] = m * context_samples + b + epsilon[:, i]
         return y.T
 
+
 @pytest.fixture
-def model_path(): 
+def model_path():
     return "resources/savedmodels/sbi/sbi_linear_from_data.pkl"
 
+
 @pytest.fixture
-def data_path(): 
+def data_path():
     return "resources/saveddata/data_validation.h5"
 
-@pytest.fixture 
+
+@pytest.fixture
 def simulator_name():
     name = MockSimulator.__name__
     register_simulator(name, MockSimulator)
     return name
 
-@pytest.fixture 
-def mock_model(model_path): 
+
+@pytest.fixture
+def mock_model(model_path):
     return SBIModel(model_path)
 
+
 @pytest.fixture
-def mock_data(data_path, simulator_name): 
+def mock_data(data_path, simulator_name):
     return H5Data(data_path, simulator_name)
 
+
 @pytest.fixture
-def config_factory(): 
+def config_factory():
     def factory(
-        out_dir=None, 
-        model_path=None, 
-        model_engine=None, 
-        data_path=None, 
-        data_engine=None, 
-        simulator=None, 
-        plot_settings=None, 
-        metrics_settings=None, 
-        plots=None, 
-        metrics=None
-):
-        config = { "common": {}, "model": {}, "data":{}, "plots_common": {}, "plots":{}, "metrics_common": {},"metrics":{}}
-        
-        # Single settings 
-        if out_dir is not None: 
-            config["common"]['out_dir'] = out_dir
-        if model_path is not None: 
-            config['model']['model_path'] = model_path
-        if model_engine is not None: 
-            config['model']['model_engine'] = model_engine
-        if data_path is not None: 
-            config['data']['data_path'] = data_path
-        if data_engine is not None: 
-            config['data']['data_engine'] = data_engine
-        if simulator is not None: 
-            config['data']['simulator'] = simulator
+        out_dir=None,
+        model_path=None,
+        model_engine=None,
+        data_path=None,
+        data_engine=None,
+        simulator=None,
+        plot_settings=None,
+        metrics_settings=None,
+        plots=None,
+        metrics=None,
+    ):
+        config = {
+            "common": {},
+            "model": {},
+            "data": {},
+            "plots_common": {},
+            "plots": {},
+            "metrics_common": {},
+            "metrics": {},
+        }
+
+        # Single settings
+        if out_dir is not None:
+            config["common"]["out_dir"] = out_dir
+        if model_path is not None:
+            config["model"]["model_path"] = model_path
+        if model_engine is not None:
+            config["model"]["model_engine"] = model_engine
+        if data_path is not None:
+            config["data"]["data_path"] = data_path
+        if data_engine is not None:
+            config["data"]["data_engine"] = data_engine
+        if simulator is not None:
+            config["data"]["simulator"] = simulator
 
         # Dict settings
-        if plot_settings is not None: 
-            for key, item in plot_settings.items(): 
-                config['plots_common'][key] = item
-        if metrics_settings is not None: 
+        if plot_settings is not None:
+            for key, item in plot_settings.items():
+                config["plots_common"][key] = item
+        if metrics_settings is not None:
             for key, item in metrics_settings.items():
-                config['metrics_common'][key] = item
+                config["metrics_common"][key] = item
 
-        if metrics is not None: 
+        if metrics is not None:
             if isinstance(metrics, dict):
                 config["metrics"] = metrics
-            if isinstance(metrics, list): 
+            if isinstance(metrics, list):
                 config["metrics"] = {metric: {} for metric in metrics}
 
-        if plots is not None: 
+        if plots is not None:
             if isinstance(plots, dict):
                 config["plots"] = plots
-            if isinstance(metrics, list): 
+            if isinstance(metrics, list):
                 config["plots"] = {plot: {} for plot in plots}
 
         temp_outpath = "./temp_config.yml"
         yaml.dump(config, open(temp_outpath, "w"))
 
         return temp_outpath
-    
-    return factory
\ No newline at end of file
+
+    return factory
diff --git a/tests/test_client.py b/tests/test_client.py
index c60706e..cfeba2a 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,45 +1,64 @@
 import subprocess
-import os 
+import os
 
-def test_parser_args(model_path, data_path, simulator_name): 
-    command = ["diagnose", 
-               "--model_path", model_path,
-               "--data_path", data_path,
-               "--simulator", simulator_name]
+
+def test_parser_args(model_path, data_path, simulator_name):
+    command = [
+        "diagnose",
+        "--model_path",
+        model_path,
+        "--data_path",
+        data_path,
+        "--simulator",
+        simulator_name,
+    ]
     process = subprocess.run(command)
     exit_code = process.returncode
-    assert exit_code == 0 
+    assert exit_code == 0
     print(process.stdout)
 
-def test_parser_config(config_factory, model_path, data_path, simulator_name): 
-    config_path = config_factory(model_path=model_path, data_path=data_path, simulator=simulator_name)
+
+def test_parser_config(config_factory, model_path, data_path, simulator_name):
+    config_path = config_factory(
+        model_path=model_path, data_path=data_path, simulator=simulator_name
+    )
     command = ["diagnose", "--config", config_path]
     process = subprocess.run(command)
     exit_code = process.returncode
-    assert exit_code == 0 
+    assert exit_code == 0
     print(process.stdout)
 
-def test_main_no_methods(config_factory, model_path, data_path, simulator_name): 
+
+def test_main_no_methods(config_factory, model_path, data_path, simulator_name):
     out_dir = "./test_out_dir/"
-    config_path = config_factory(model_path=model_path, data_path=data_path, simulator=simulator_name, plots=[], metrics=[], out_dir=out_dir)
+    config_path = config_factory(
+        model_path=model_path,
+        data_path=data_path,
+        simulator=simulator_name,
+        plots=[],
+        metrics=[],
+        out_dir=out_dir,
+    )
     command = ["diagnose", "--config", config_path]
     process = subprocess.run(command)
     exit_code = process.returncode
-    assert exit_code == 0 
+    assert exit_code == 0
     print(process.stdout)
 
     # There should be nothing at the outpath
     assert os.listdir(out_dir) == []
 
-def test_main_missing_config(): 
+
+def test_main_missing_config():
     config_path = "there_is_no_config_at_this_path.yml"
     command = ["diagnose", "--config", config_path]
     process = subprocess.run(command)
     exit_code = process.returncode
     assert exit_code == 1
     print(process.stdout)
-    
-def test_main_missing_args(model_path): 
+
+
+def test_main_missing_args(model_path):
     command = ["diagnose", "--model_path", model_path]
     process = subprocess.run(command)
     exit_code = process.returncode
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index 3ed6116..f2fe001 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -6,11 +6,11 @@
 import os
 
 # flake8: noqa
-#sys.path.append("..")
+# sys.path.append("..")
 print(sys.path)
 from scripts.evaluate import Diagnose_static, Diagnose_generative
 from scripts.io import ModelLoader
-#from src.scripts import evaluate
+# from src.scripts import evaluate
 
 
 """
@@ -26,6 +26,7 @@
 def diagnose_static_instance():
     return Diagnose_static()
 
+
 @pytest.fixture
 def diagnose_generative_instance():
     return Diagnose_generative()
@@ -34,22 +35,25 @@ def diagnose_generative_instance():
 @pytest.fixture
 def posterior_generative_sbi_model():
     # create a temporary directory for the saved model
-    #dir = "savedmodels/sbi/"
-    #os.makedirs(dir)
+    # dir = "savedmodels/sbi/"
+    # os.makedirs(dir)
 
     # now save the model
     low_bounds = torch.tensor([0, -10])
     high_bounds = torch.tensor([10, 10])
 
-    prior = sbi.utils.BoxUniform(low = low_bounds, high = high_bounds)
+    prior = sbi.utils.BoxUniform(low=low_bounds, high=high_bounds)
 
-    posterior = sbi.inference.base.infer(simulator, prior, "SNPE", num_simulations=10000)
+    posterior = sbi.inference.base.infer(
+        simulator, prior, "SNPE", num_simulations=10000
+    )
 
     # Provide the posterior to the tests
     yield prior, posterior
 
     # Teardown: Remove the temporary directory and its contents
-    #shutil.rmtree(dataset_dir)
+    # shutil.rmtree(dataset_dir)
+
 
 @pytest.fixture
 def setup_plot_dir():
@@ -58,6 +62,7 @@ def setup_plot_dir():
     os.makedirs(dir)
     yield dir
 
+
 def simulator(thetas):  # , percent_errors):
     # convert to numpy array (if tensor):
     thetas = np.atleast_2d(thetas)
@@ -92,15 +97,16 @@ def simulator(thetas):  # , percent_errors):
     return torch.Tensor(y.T)
 
 
-def test_generate_sbc_samples(diagnose_generative_instance,
-                              posterior_generative_sbi_model):
+def test_generate_sbc_samples(
+    diagnose_generative_instance, posterior_generative_sbi_model
+):
     # Mock data
-    #low_bounds = torch.tensor([0, -10])
-    #high_bounds = torch.tensor([10, 10])
+    # low_bounds = torch.tensor([0, -10])
+    # high_bounds = torch.tensor([10, 10])
 
-    #prior = sbi.utils.BoxUniform(low=low_bounds, high=high_bounds)
+    # prior = sbi.utils.BoxUniform(low=low_bounds, high=high_bounds)
     prior, posterior = posterior_generative_sbi_model
-    #inference_instance  # provide a mock posterior object
+    # inference_instance  # provide a mock posterior object
     simulator_test = simulator  # provide a mock simulator function
     num_sbc_runs = 1000
     num_posterior_samples = 1000
@@ -113,12 +119,12 @@ def test_generate_sbc_samples(diagnose_generative_instance,
     # Add assertions based on the expected behavior of the method
 
 
-def test_run_all_sbc(diagnose_generative_instance,
-                     posterior_generative_sbi_model,
-                     setup_plot_dir):
+def test_run_all_sbc(
+    diagnose_generative_instance, posterior_generative_sbi_model, setup_plot_dir
+):
     labels_list = ["$m$", "$b$"]
     colorlist = ["#9C92A3", "#0F5257"]
-    
+
     prior, posterior = posterior_generative_sbi_model
     simulator_test = simulator  # provide a mock simulator function
 
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 1cec089..371d5f2 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,54 +1,62 @@
-import os 
-import pytest 
+import os
+import pytest
 
-from utils.defaults import Defaults 
+from utils.defaults import Defaults
 from utils.config import Config
-from metrics import (
-    Metrics, 
-    CoverageFraction, 
-    AllSBC
-)
+from metrics import Metrics, CoverageFraction, AllSBC
+
 
 @pytest.fixture
-def metric_config(config_factory): 
-    metrics_settings={"use_progress_bar":False, "samples_per_inference":10, "percentiles":[95]}
+def metric_config(config_factory):
+    metrics_settings = {
+        "use_progress_bar": False,
+        "samples_per_inference": 10,
+        "percentiles": [95],
+    }
     config = config_factory(metrics_settings=metrics_settings)
     Config(config)
     return config
 
-def test_all_metrics_catalogued(): 
-    '''Each metrics gets its own file, and each metric is included in the Metrics dictionary 
-    so the client can use it. 
-    This test verifies all metrics are cataloged'''
+
+def test_all_metrics_catalogued():
+    """Each metrics gets its own file, and each metric is included in the Metrics dictionary
+    so the client can use it.
+    This test verifies all metrics are cataloged"""
 
     all_files = os.listdir("src/metrics/")
-    files_ignore = ['metric.py', '__init__.py', '__pycache__'] # All files not containing a metric 
+    files_ignore = [
+        "metric.py",
+        "__init__.py",
+        "__pycache__",
+    ]  # All files not containing a metric
     num_files = len([file for file in all_files if file not in files_ignore])
     assert len(Metrics) == num_files
 
-def test_all_defaults(metric_config, mock_model, mock_data): 
+
+def test_all_defaults(metric_config, mock_model, mock_data):
     """
     Ensures each metric has a default set of parameters and is included in the defaults list
-    Ensures each test can initialize, regardless of the veracity of the output 
+    Ensures each test can initialize, regardless of the veracity of the output
     """
     Config(metric_config)
 
-    for metric_name, metric_obj in Metrics.items(): 
-        assert metric_name in Defaults['metrics']
+    for metric_name, metric_obj in Metrics.items():
+        assert metric_name in Defaults["metrics"]
         metric_obj(mock_model, mock_data)
 
 
-def test_coverage_fraction(metric_config, mock_model, mock_data): 
+def test_coverage_fraction(metric_config, mock_model, mock_data):
     Config(metric_config)
     coverage_fraction = CoverageFraction(mock_model, mock_data)
     _, coverage = coverage_fraction.calculate()
     assert coverage_fraction.output.all() is not None
 
-    # TODO Shape of coverage 
-    assert coverage.shape 
-    
-def test_all_sbc(metric_config, mock_model, mock_data): 
+    # TODO Shape of coverage
+    assert coverage.shape
+
+
+def test_all_sbc(metric_config, mock_model, mock_data):
     Config(metric_config)
     all_sbc = AllSBC(mock_model, mock_data)
     all_sbc()
-    # TODO What is this supposed to be
\ No newline at end of file
+    # TODO What is this supposed to be
diff --git a/tests/test_plots.py b/tests/test_plots.py
index 253343b..50e0d1f 100644
--- a/tests/test_plots.py
+++ b/tests/test_plots.py
@@ -1,58 +1,66 @@
-import os 
-import pytest 
+import os
+import pytest
 
-from utils.defaults import Defaults 
+from utils.defaults import Defaults
 from utils.config import Config, get_item
-from plots import (
-    Plots, 
-    CDFRanks, 
-    Ranks, 
-    CoverageFraction, 
-    TARP
-)
+from plots import Plots, CDFRanks, Ranks, CoverageFraction, TARP
+
 
 @pytest.fixture
-def plot_config(config_factory): 
+def plot_config(config_factory):
     out_dir = "./temp_results/"
-    metrics_settings={"use_progress_bar":False, "samples_per_inference":10, "percentiles":[95]}
+    metrics_settings = {
+        "use_progress_bar": False,
+        "samples_per_inference": 10,
+        "percentiles": [95],
+    }
     config = config_factory(out_dir=out_dir, metrics_settings=metrics_settings)
     Config(config)
 
 
-def test_all_plot_catalogued(): 
-    '''Each metrics gets its own file, and each metric is included in the Metrics dictionary 
-    so the client can use it. 
-    This test verifies all metrics are cataloged'''
+def test_all_plot_catalogued():
+    """Each metrics gets its own file, and each metric is included in the Metrics dictionary
+    so the client can use it.
+    This test verifies all metrics are cataloged"""
 
     all_files = os.listdir("src/plots/")
-    files_ignore = ['plot.py', '__init__.py', '__pycache__'] # All files not containing a metric 
+    files_ignore = [
+        "plot.py",
+        "__init__.py",
+        "__pycache__",
+    ]  # All files not containing a metric
     num_files = len([file for file in all_files if file not in files_ignore])
     assert len(Plots) == num_files
 
-def test_all_defaults(plot_config, mock_model, mock_data): 
+
+def test_all_defaults(plot_config, mock_model, mock_data):
     """
     Ensures each metric has a default set of parameters and is included in the defaults list
-    Ensures each test can initialize, regardless of the veracity of the output 
+    Ensures each test can initialize, regardless of the veracity of the output
     """
-    for plot_name, plot_obj in Plots.items(): 
-        assert plot_name in Defaults['plots']
-        plot_obj(mock_model, mock_data,  save=True, show=False)
+    for plot_name, plot_obj in Plots.items():
+        assert plot_name in Defaults["plots"]
+        plot_obj(mock_model, mock_data, save=True, show=False)
+
 
-def test_plot_cdf(plot_config, mock_model, mock_data): 
+def test_plot_cdf(plot_config, mock_model, mock_data):
     plot = CDFRanks(mock_model, mock_data, save=True, show=False)
-    plot(**get_item("plots", "CDFRanks", raise_exception=False)) 
+    plot(**get_item("plots", "CDFRanks", raise_exception=False))
     assert os.path.exists(f"{plot.out_path}/{plot.plot_name}")
 
-def test_plot_ranks(plot_config, mock_model, mock_data): 
+
+def test_plot_ranks(plot_config, mock_model, mock_data):
     plot = Ranks(mock_model, mock_data, save=True, show=False)
     plot(**get_item("plots", "Ranks", raise_exception=False))
     assert os.path.exists(f"{plot.out_path}/{plot.plot_name}")
 
-def test_plot_coverage(plot_config, mock_model, mock_data): 
+
+def test_plot_coverage(plot_config, mock_model, mock_data):
     plot = CoverageFraction(mock_model, mock_data, save=True, show=False)
     plot(**get_item("plots", "CoverageFraction", raise_exception=False))
     assert os.path.exists(f"{plot.out_path}/{plot.plot_name}")
 
-def test_plot_tarp(plot_config, mock_model, mock_data): 
+
+def test_plot_tarp(plot_config, mock_model, mock_data):
     plot = TARP(mock_model, mock_data, save=True, show=False)
-    plot(**get_item("plots", "TARP", raise_exception=False))
\ No newline at end of file
+    plot(**get_item("plots", "TARP", raise_exception=False))

From bc1442d6ecf7c4d2e7098903a38d876f8025ecdc Mon Sep 17 00:00:00 2001
From: voetberg <magpie127@gmail.com>
Date: Thu, 6 Jun 2024 16:50:49 -0500
Subject: [PATCH 2/5] pinned scipy version

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index eea7716..095de07 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ numpy = "^1.26.4"
 matplotlib = "^3.8.3"
 tarp = "^0.1.1"
 deprecation = "^2.1.0"
+scipy = "1.12.0"
 
 
 [tool.poetry.group.dev.dependencies]

From bff6f0330d5540900989f7a654e130a82710f507 Mon Sep 17 00:00:00 2001
From: voetberg <magpie127@gmail.com>
Date: Thu, 6 Jun 2024 16:51:19 -0500
Subject: [PATCH 3/5] removed depreciated test_evaluate

---
 tests/test_evaluate.py | 191 -----------------------------------------
 1 file changed, 191 deletions(-)
 delete mode 100644 tests/test_evaluate.py

diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
deleted file mode 100644
index f2fe001..0000000
--- a/tests/test_evaluate.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import sys
-import pytest
-import torch
-import numpy as np
-import sbi
-import os
-
-# flake8: noqa
-# sys.path.append("..")
-print(sys.path)
-from scripts.evaluate import Diagnose_static, Diagnose_generative
-from scripts.io import ModelLoader
-# from src.scripts import evaluate
-
-
-"""
-"""
-
-
-"""
-Test the evaluate module
-"""
-
-
-@pytest.fixture
-def diagnose_static_instance():
-    return Diagnose_static()
-
-
-@pytest.fixture
-def diagnose_generative_instance():
-    return Diagnose_generative()
-
-
-@pytest.fixture
-def posterior_generative_sbi_model():
-    # create a temporary directory for the saved model
-    # dir = "savedmodels/sbi/"
-    # os.makedirs(dir)
-
-    # now save the model
-    low_bounds = torch.tensor([0, -10])
-    high_bounds = torch.tensor([10, 10])
-
-    prior = sbi.utils.BoxUniform(low=low_bounds, high=high_bounds)
-
-    posterior = sbi.inference.base.infer(
-        simulator, prior, "SNPE", num_simulations=10000
-    )
-
-    # Provide the posterior to the tests
-    yield prior, posterior
-
-    # Teardown: Remove the temporary directory and its contents
-    # shutil.rmtree(dataset_dir)
-
-
-@pytest.fixture
-def setup_plot_dir():
-    # create a temporary directory for the saved model
-    dir = "tests/plots/"
-    os.makedirs(dir)
-    yield dir
-
-
-def simulator(thetas):  # , percent_errors):
-    # convert to numpy array (if tensor):
-    thetas = np.atleast_2d(thetas)
-    # Check if the input has the correct shape
-    if thetas.shape[1] != 2:
-        raise ValueError(
-            "Input tensor must have shape (n, 2) \
-            where n is the number of parameter sets."
-        )
-
-    # Unpack the parameters
-    if thetas.shape[0] == 1:
-        # If there's only one set of parameters, extract them directly
-        m, b = thetas[0, 0], thetas[0, 1]
-    else:
-        # If there are multiple sets of parameters, extract them for each row
-        m, b = thetas[:, 0], thetas[:, 1]
-    x = np.linspace(0, 100, 101)
-    rs = np.random.RandomState()  # 2147483648)#
-    # I'm thinking sigma could actually be a function of x
-    # if we want to get fancy down the road
-    # Generate random noise (epsilon) based
-    # on a normal distribution with mean 0 and standard deviation sigma
-    sigma = 5
-    ε = rs.normal(loc=0, scale=sigma, size=(len(x), thetas.shape[0]))
-
-    # Initialize an empty array to store the results for each set of parameters
-    y = np.zeros((len(x), thetas.shape[0]))
-    for i in range(thetas.shape[0]):
-        m, b = thetas[i, 0], thetas[i, 1]
-        y[:, i] = m * x + b + ε[:, i]
-    return torch.Tensor(y.T)
-
-
-def test_generate_sbc_samples(
-    diagnose_generative_instance, posterior_generative_sbi_model
-):
-    # Mock data
-    # low_bounds = torch.tensor([0, -10])
-    # high_bounds = torch.tensor([10, 10])
-
-    # prior = sbi.utils.BoxUniform(low=low_bounds, high=high_bounds)
-    prior, posterior = posterior_generative_sbi_model
-    # inference_instance  # provide a mock posterior object
-    simulator_test = simulator  # provide a mock simulator function
-    num_sbc_runs = 1000
-    num_posterior_samples = 1000
-
-    # Generate SBC samples
-    thetas, ys, ranks, dap_samples = diagnose_generative_instance.generate_sbc_samples(
-        prior, posterior, simulator_test, num_sbc_runs, num_posterior_samples
-    )
-
-    # Add assertions based on the expected behavior of the method
-
-
-def test_run_all_sbc(
-    diagnose_generative_instance, posterior_generative_sbi_model, setup_plot_dir
-):
-    labels_list = ["$m$", "$b$"]
-    colorlist = ["#9C92A3", "#0F5257"]
-
-    prior, posterior = posterior_generative_sbi_model
-    simulator_test = simulator  # provide a mock simulator function
-
-    save_path = setup_plot_dir
-
-    diagnose_generative_instance.run_all_sbc(
-        prior,
-        posterior,
-        simulator_test,
-        labels_list,
-        colorlist,
-        num_sbc_runs=1_000,
-        num_posterior_samples=1_000,
-        samples_per_inference=1_000,
-        plot=False,
-        save=True,
-        path=save_path,
-    )
-    # Check if PDF files were saved
-    assert os.path.exists(save_path), f"No 'plots' folder found at {save_path}"
-
-    # List all files in the directory
-    files_in_directory = os.listdir(save_path)
-
-    # Check if at least one PDF file is present
-    pdf_files = [file for file in files_in_directory if file.endswith(".pdf")]
-    assert pdf_files, "No PDF files found in the 'plots' folder"
-
-    # We expect the pdfs to exist in the directory
-    expected_pdf_files = ["sbc_ranks.pdf", "sbc_ranks_cdf.pdf", "coverage.pdf"]
-    for expected_file in expected_pdf_files:
-        assert (
-            expected_file in pdf_files
-        ), f"Expected PDF file '{expected_file}' not found"
-
-
-"""
-def test_sbc_statistics(diagnose_instance):
-    # Mock data
-    ranks =  # provide mock ranks
-    thetas =  # provide mock thetas
-    dap_samples =  # provide mock dap_samples
-    num_posterior_samples = 1000
-
-    # Calculate SBC statistics
-    check_stats = diagnose_instance.sbc_statistics(
-        ranks, thetas, dap_samples, num_posterior_samples
-    )
-
-    # Add assertions based on the expected behavior of the method
-
-def test_plot_1d_ranks(diagnose_instance):
-    # Mock data
-    ranks =  # provide mock ranks
-    num_posterior_samples = 1000
-    labels_list =  # provide mock labels_list
-    colorlist =  # provide mock colorlist
-
-    # Plot 1D ranks
-    diagnose_instance.plot_1d_ranks(
-        ranks, num_posterior_samples, labels_list,
-        colorlist, plot=False, save=False
-    )
-"""

From 5315ef46e7f23e724aa2c7b46869e65d4188b849 Mon Sep 17 00:00:00 2001
From: voetberg <magpie127@gmail.com>
Date: Thu, 6 Jun 2024 16:54:55 -0500
Subject: [PATCH 4/5] Exclude tests from coverage report

---
 .github/workflows/test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index fda3255..2224407 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -43,7 +43,7 @@ jobs:
       run: echo "PYTHONPATH=$(pwd):$(pwd)/src" >> ${{ runner.workspace }}/.env
     
     - name: Test with pytest
-      run: python -m poetry run pytest --cov
+      run: python -m poetry run pytest --cov=src/
       env:
         PYTHONPATH: ${{ env.PYTHONPATH }}
         ENV_FILE: ${{ runner.workspace }}/.env

From e4ca9b4c659b9d0031e9bd247c6bac5978475414 Mon Sep 17 00:00:00 2001
From: voetberg <magpie127@gmail.com>
Date: Fri, 7 Jun 2024 07:45:40 -0500
Subject: [PATCH 5/5] Removed flakey tests

---
 tests/test_metrics.py | 15 ---------------
 tests/test_plots.py   | 15 ---------------
 2 files changed, 30 deletions(-)

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 371d5f2..f04a39d 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -18,21 +18,6 @@ def metric_config(config_factory):
     return config
 
 
-def test_all_metrics_catalogued():
-    """Each metrics gets its own file, and each metric is included in the Metrics dictionary
-    so the client can use it.
-    This test verifies all metrics are cataloged"""
-
-    all_files = os.listdir("src/metrics/")
-    files_ignore = [
-        "metric.py",
-        "__init__.py",
-        "__pycache__",
-    ]  # All files not containing a metric
-    num_files = len([file for file in all_files if file not in files_ignore])
-    assert len(Metrics) == num_files
-
-
 def test_all_defaults(metric_config, mock_model, mock_data):
     """
     Ensures each metric has a default set of parameters and is included in the defaults list
diff --git a/tests/test_plots.py b/tests/test_plots.py
index 50e0d1f..677b9fa 100644
--- a/tests/test_plots.py
+++ b/tests/test_plots.py
@@ -18,21 +18,6 @@ def plot_config(config_factory):
     Config(config)
 
 
-def test_all_plot_catalogued():
-    """Each metrics gets its own file, and each metric is included in the Metrics dictionary
-    so the client can use it.
-    This test verifies all metrics are cataloged"""
-
-    all_files = os.listdir("src/plots/")
-    files_ignore = [
-        "plot.py",
-        "__init__.py",
-        "__pycache__",
-    ]  # All files not containing a metric
-    num_files = len([file for file in all_files if file not in files_ignore])
-    assert len(Plots) == num_files
-
-
 def test_all_defaults(plot_config, mock_model, mock_data):
     """
     Ensures each metric has a default set of parameters and is included in the defaults list