Merge pull request #69 from voetberg/test_fix

Correct CICD so tests pass when they should
deepskies · Jun 11, 2024 · 4cd252c · 4cd252c
2 parents 3f67b67 + 77b3b39
commit 4cd252c
Show file tree

Hide file tree

Showing 13 changed files with 352 additions and 426 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -32,6 +32,9 @@ jobs:
     - name: View poetry --help
       run: poetry --help
 
+    - name: Update lockfile
+      run: python -m poetry lock
+
     - name: Install dependencies
       shell: bash
       run: python -m poetry install
@@ -40,7 +43,7 @@ jobs:
       run: echo "PYTHONPATH=$(pwd):$(pwd)/src" >> ${{ runner.workspace }}/.env
 
     - name: Test with pytest
-      run: python -m poetry run pytest --cov
+      run: python -m poetry run pytest --cov=src/
       env:
         PYTHONPATH: ${{ env.PYTHONPATH }}
         ENV_FILE: ${{ runner.workspace }}/.env
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -1,3 +1,7 @@
+import sys
+
+sys.path.append("../src")
+
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:
@@ -6,17 +10,14 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = 'DeepDiagnostics'
-copyright = '2024, Becky Nevin, M Voetberg, Brian Nord'
-author = 'Becky Nevin, M Voetberg, Brian Nord'
-release = '0.1.0'
+project = "DeepDiagnostics"
+copyright = "2024, Becky Nevin, M Voetberg, Brian Nord"
+author = "Becky Nevin, M Voetberg, Brian Nord"
+release = "0.1.0"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-import sys
-sys.path.append("../src")
-
 extensions = [
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
@@ -34,5 +35,5 @@
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'alabaster'
-html_static_path = ['_static']
+html_theme = "alabaster"
+html_static_path = ["_static"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ numpy = "^1.26.4"
 matplotlib = "^3.8.3"
 tarp = "^0.1.1"
 deprecation = "^2.1.0"
+scipy = "1.12.0"
 
 
 [tool.poetry.group.dev.dependencies]

diff --git a/src/data/data.py b/src/data/data.py
@@ -68,7 +68,7 @@ def read_prior(self):
         raise NotImplementedError
 
     def load_prior(self, prior, prior_kwargs):
-        if prior is None: 
+        if prior is None:
             prior = get_item("data", "prior", raise_exception=False)
         try:
             prior = self.read_prior()

diff --git a/src/data/simulator.py b/src/data/simulator.py
@@ -1,4 +1,3 @@
-from typing import Any
 import numpy as np
 from abc import abstractmethod, ABC
 

diff --git a/src/metrics/local_two_sample.py b/src/metrics/local_two_sample.py
@@ -1,6 +1,7 @@
 from typing import Any, Optional, Sequence, Union
 import numpy as np 
 
+
 from sklearn.model_selection import KFold
 from sklearn.neural_network import MLPClassifier
 from sklearn.utils import shuffle
@@ -31,15 +32,16 @@ def __init__(
             number_simulations
         )
 
-    def _collect_data_params(self):
 
+    def _collect_data_params(self):
         # P is the prior and x_P is generated via the simulator from the parameters P.
         self.p = self.data.sample_prior(self.number_simulations)
         self.q = np.zeros_like(self.p)
 
         context_size = self.data.true_context().shape[-1]
         self.outcome_given_p = np.zeros(
             (self.number_simulations, context_size)
+
         )
         self.outcome_given_q = np.zeros_like(self.outcome_given_p)
         self.evaluation_context = np.zeros_like(self.outcome_given_p)
@@ -48,29 +50,35 @@ def _collect_data_params(self):
             context = self.data.simulator.generate_context(context_size)
             self.outcome_given_p[index] = self.data.simulator.simulate(p, context)
             # Q is the approximate posterior amortized in x
-            q =  self.model.sample_posterior(1, context).ravel()
+            q = self.model.sample_posterior(1, context).ravel()
             self.q[index] = q
             self.outcome_given_q[index] = self.data.simulator.simulate(q, context)
 
-        self.evaluation_context = np.array([self.data.simulator.generate_context(context_size) for _ in range(self.number_simulations)])
+        self.evaluation_context = np.array(
+            [
+                self.data.simulator.generate_context(context_size)
+                for _ in range(self.num_simulations)
+            ]
+        )
 
-    def train_linear_classifier(self, p, q, x_p, x_q, classifier:str, classifier_kwargs:dict={}): 
-        classifier_map = {
-            "MLP":MLPClassifier
-        }
-        try: 
+    def train_linear_classifier(
+        self, p, q, x_p, x_q, classifier: str, classifier_kwargs: dict = {}
+    ):
+        classifier_map = {"MLP": MLPClassifier}
+        try:
             classifier = classifier_map[classifier](**classifier_kwargs)
-        except KeyError: 
+        except KeyError:
             raise NotImplementedError(
-                f"{classifier} not implemented, choose from {list(classifier_map.keys())}.")
+                f"{classifier} not implemented, choose from {list(classifier_map.keys())}."
+            )
 
         joint_P_x = np.concatenate([p, x_p], axis=1)
         joint_Q_x = np.concatenate([q, x_q], axis=1)
 
         features = np.concatenate([joint_P_x, joint_Q_x], axis=0)
         labels = np.concatenate(
             [np.array([0] * len(joint_P_x)), np.array([1] * len(joint_Q_x))]
-        ).ravel() 
+        ).ravel()
 
         # shuffle features and labels
         features, labels = shuffle(features, labels)
@@ -79,44 +87,73 @@ def train_linear_classifier(self, p, q, x_p, x_q, classifier:str, classifier_kwa
         classifier.fit(X=features, y=labels)
         return classifier
 
-    def _eval_model(self, P, evaluation_sample, classifier): 
+    def _eval_model(self, P, evaluation_sample, classifier):
         evaluation = np.concatenate([P, evaluation_sample], axis=1)
         probability = classifier.predict_proba(evaluation)[:, 0]
-        return probability 
-
-    def _scores(self, p, q, x_p, x_q, classifier, cross_evaluate: bool=True, classifier_kwargs=None): 
+        return probability
+
+    def _scores(
+        self,
+        p,
+        q,
+        x_p,
+        x_q,
+        classifier,
+        cross_evaluate: bool = True,
+        classifier_kwargs=None,
+    ):
         model_probabilities = []
-        for model, model_args in zip(classifier, classifier_kwargs): 
-            if cross_evaluate: 
-                model_probabilities.append(self._cross_eval_score(p, q, x_p, x_q, model, model_args))
-            else: 
-                trained_model = self.train_linear_classifier(p, q, x_p, x_q, model, model_args)
-                model_probabilities.append(self._eval_model(P=p, classifier=trained_model))
+        for model, model_args in zip(classifier, classifier_kwargs):
+            if cross_evaluate:
+                model_probabilities.append(
+                    self._cross_eval_score(p, q, x_p, x_q, model, model_args)
+                )
+            else:
+                trained_model = self.train_linear_classifier(
+                    p, q, x_p, x_q, model, model_args
+                )
+                model_probabilities.append(
+                    self._eval_model(P=p, classifier=trained_model)
+                )
 
         return np.mean(model_probabilities, axis=0)
 
-    def _cross_eval_score(self, p, q, x_p, x_q, classifier, classifier_kwargs, n_cross_folds=5): 
-        kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42) # Getting the shape
+    def _cross_eval_score(
+        self, p, q, x_p, x_q, classifier, classifier_kwargs, n_cross_folds=5
+    ):
+        kf = KFold(
+            n_splits=n_cross_folds, shuffle=True, random_state=42
+        )  # Getting the shape
         cv_splits = kf.split(p)
         # train classifiers over cv-folds
         probabilities = []
-        self.evaluation_data = np.zeros((n_cross_folds, len(next(cv_splits)[1]), self.evaluation_context.shape[-1]))
+        self.evaluation_data = np.zeros(
+            (n_cross_folds, len(next(cv_splits)[1]), self.evaluation_context.shape[-1])
+        )
         self.prior_evaluation = np.zeros_like(p)
-        
-        kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42) 
+
+        kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42)
         cv_splits = kf.split(p)
         for cross_trial, (train_index, val_index) in enumerate(cv_splits):
             # get train split
-            p_train, x_p_train = p[train_index,:], x_p[train_index,:]
-            q_train, x_q_train = q[train_index,:], x_q[train_index,:]
-            trained_nth_classifier = self.train_linear_classifier(p_train, q_train, x_p_train, x_q_train, classifier, classifier_kwargs)
+            p_train, x_p_train = p[train_index, :], x_p[train_index, :]
+            q_train, x_q_train = q[train_index, :], x_q[train_index, :]
+            trained_nth_classifier = self.train_linear_classifier(
+                p_train, q_train, x_p_train, x_q_train, classifier, classifier_kwargs
+            )
             p_evaluate = p[val_index]
-            for index, p_validation in enumerate(p_evaluate): 
+            for index, p_validation in enumerate(p_evaluate):
                 self.evaluation_data[cross_trial][index] = self.data.simulator.simulate(
                     p_validation, self.evaluation_context[val_index][index]
                 )
             self.prior_evaluation[index] = p_validation
-            probabilities.append(self._eval_model(p_evaluate, self.evaluation_data[cross_trial], trained_nth_classifier))
+            probabilities.append(
+                self._eval_model(
+                    p_evaluate,
+                    self.evaluation_data[cross_trial],
+                    trained_nth_classifier,
+                )
+            )
         return probabilities
 
     def permute_data(self, P, Q):
@@ -130,68 +167,68 @@ def permute_data(self, P, Q):
         X = np.concatenate([P, Q], axis=0)
         X_perm = X[self.data.rng.permutation(np.arange(n_samples * 2))]
         return X_perm[:n_samples], X_perm[n_samples:]
-
-    def calculate(
-            self, 
-            linear_classifier:Union[str, list[str]]='MLP', 
-            cross_evaluate:bool=True, 
-            n_null_hypothesis_trials=100, 
-            classifier_kwargs:Union[dict, list[dict]]=None
-        ):
 
-        if isinstance(linear_classifier, str): 
+    def calculate(
+        self,
+        linear_classifier: Union[str, list[str]] = "MLP",
+        cross_evaluate: bool = True,
+        n_null_hypothesis_trials=100,
+        classifier_kwargs: Union[dict, list[dict]] = None,
+    ):
+        if isinstance(linear_classifier, str):
             linear_classifier = [linear_classifier]
 
-        if classifier_kwargs is None: 
+        if classifier_kwargs is None:
             classifier_kwargs = {}
-        if isinstance(classifier_kwargs, dict): 
+        if isinstance(classifier_kwargs, dict):
             classifier_kwargs = [classifier_kwargs]
 
         probabilities = self._scores(
-            self.p, 
-            self.q, 
-            self.outcome_given_p, 
-            self.outcome_given_q, 
-            classifier=linear_classifier, 
-            cross_evaluate=cross_evaluate, 
-            classifier_kwargs=classifier_kwargs
+            self.p,
+            self.q,
+            self.outcome_given_p,
+            self.outcome_given_q,
+            classifier=linear_classifier,
+            cross_evaluate=cross_evaluate,
+            classifier_kwargs=classifier_kwargs,
         )
         null_hypothesis_probabilities = []
-        for _ in range(n_null_hypothesis_trials): 
+        for _ in range(n_null_hypothesis_trials):
             joint_P_x = np.concatenate([self.p, self.outcome_given_p], axis=1)
             joint_Q_x = np.concatenate([self.q, self.outcome_given_q], axis=1)
             joint_P_x_perm, joint_Q_x_perm = self.permute_data(
-                joint_P_x, joint_Q_x,
+                joint_P_x,
+                joint_Q_x,
             )
             p_null = joint_P_x_perm[:, : self.p.shape[-1]]
             p_given_x_null = joint_P_x_perm[:, self.p.shape[-1] :]
             q_null = joint_Q_x_perm[:, : self.q.shape[-1]]
             q_given_x_null = joint_Q_x_perm[:, self.q.shape[-1] :]
 
             null_result = self._scores(
-                p_null, 
-                q_null, 
-                p_given_x_null, 
-                q_given_x_null, 
-                classifier=linear_classifier, 
-                cross_evaluate=cross_evaluate, 
-                classifier_kwargs=classifier_kwargs
+                p_null,
+                q_null,
+                p_given_x_null,
+                q_given_x_null,
+                classifier=linear_classifier,
+                cross_evaluate=cross_evaluate,
+                classifier_kwargs=classifier_kwargs,
             )
 
             null_hypothesis_probabilities.append(null_result)
-        
-        null =  np.array(null_hypothesis_probabilities)
+
+        null = np.array(null_hypothesis_probabilities)
         self.output = {
             "lc2st_probabilities": probabilities.tolist(), 
             "lc2st_null_hypothesis_probabilities": null.tolist()
         }
         return probabilities, null
-    
+
     def __call__(self, **kwds: Any) -> Any:
-        try: 
+        try:
             self._collect_data_params()
-        except NotImplementedError: 
-            pass 
+        except NotImplementedError:
+            pass
 
         self.calculate(**kwds)
-        self._finish()
+        self._finish()