Skip to content

Commit

Permalink
Merge pull request #69 from voetberg/test_fix
Browse files Browse the repository at this point in the history
Correct CICD so tests pass when they should
  • Loading branch information
bnord committed Jun 11, 2024
2 parents 3f67b67 + 77b3b39 commit 4cd252c
Show file tree
Hide file tree
Showing 13 changed files with 352 additions and 426 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ jobs:
- name: View poetry --help
run: poetry --help

- name: Update lockfile
run: python -m poetry lock

- name: Install dependencies
shell: bash
run: python -m poetry install
Expand All @@ -40,7 +43,7 @@ jobs:
run: echo "PYTHONPATH=$(pwd):$(pwd)/src" >> ${{ runner.workspace }}/.env

- name: Test with pytest
run: python -m poetry run pytest --cov
run: python -m poetry run pytest --cov=src/
env:
PYTHONPATH: ${{ env.PYTHONPATH }}
ENV_FILE: ${{ runner.workspace }}/.env
19 changes: 10 additions & 9 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import sys

sys.path.append("../src")

# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
Expand All @@ -6,17 +10,14 @@
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = 'DeepDiagnostics'
copyright = '2024, Becky Nevin, M Voetberg, Brian Nord'
author = 'Becky Nevin, M Voetberg, Brian Nord'
release = '0.1.0'
project = "DeepDiagnostics"
copyright = "2024, Becky Nevin, M Voetberg, Brian Nord"
author = "Becky Nevin, M Voetberg, Brian Nord"
release = "0.1.0"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

import sys
sys.path.append("../src")

extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
Expand All @@ -34,5 +35,5 @@
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'alabaster'
html_static_path = ['_static']
html_theme = "alabaster"
html_static_path = ["_static"]
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ numpy = "^1.26.4"
matplotlib = "^3.8.3"
tarp = "^0.1.1"
deprecation = "^2.1.0"
scipy = "1.12.0"


[tool.poetry.group.dev.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion src/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def read_prior(self):
raise NotImplementedError

def load_prior(self, prior, prior_kwargs):
if prior is None:
if prior is None:
prior = get_item("data", "prior", raise_exception=False)
try:
prior = self.read_prior()
Expand Down
1 change: 0 additions & 1 deletion src/data/simulator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import Any
import numpy as np
from abc import abstractmethod, ABC

Expand Down
167 changes: 102 additions & 65 deletions src/metrics/local_two_sample.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any, Optional, Sequence, Union
import numpy as np


from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle
Expand Down Expand Up @@ -31,15 +32,16 @@ def __init__(
number_simulations
)

def _collect_data_params(self):

def _collect_data_params(self):
# P is the prior and x_P is generated via the simulator from the parameters P.
self.p = self.data.sample_prior(self.number_simulations)
self.q = np.zeros_like(self.p)

context_size = self.data.true_context().shape[-1]
self.outcome_given_p = np.zeros(
(self.number_simulations, context_size)

)
self.outcome_given_q = np.zeros_like(self.outcome_given_p)
self.evaluation_context = np.zeros_like(self.outcome_given_p)
Expand All @@ -48,29 +50,35 @@ def _collect_data_params(self):
context = self.data.simulator.generate_context(context_size)
self.outcome_given_p[index] = self.data.simulator.simulate(p, context)
# Q is the approximate posterior amortized in x
q = self.model.sample_posterior(1, context).ravel()
q = self.model.sample_posterior(1, context).ravel()
self.q[index] = q
self.outcome_given_q[index] = self.data.simulator.simulate(q, context)

self.evaluation_context = np.array([self.data.simulator.generate_context(context_size) for _ in range(self.number_simulations)])
self.evaluation_context = np.array(
[
self.data.simulator.generate_context(context_size)
for _ in range(self.num_simulations)
]
)

def train_linear_classifier(self, p, q, x_p, x_q, classifier:str, classifier_kwargs:dict={}):
classifier_map = {
"MLP":MLPClassifier
}
try:
def train_linear_classifier(
self, p, q, x_p, x_q, classifier: str, classifier_kwargs: dict = {}
):
classifier_map = {"MLP": MLPClassifier}
try:
classifier = classifier_map[classifier](**classifier_kwargs)
except KeyError:
except KeyError:
raise NotImplementedError(
f"{classifier} not implemented, choose from {list(classifier_map.keys())}.")
f"{classifier} not implemented, choose from {list(classifier_map.keys())}."
)

joint_P_x = np.concatenate([p, x_p], axis=1)
joint_Q_x = np.concatenate([q, x_q], axis=1)

features = np.concatenate([joint_P_x, joint_Q_x], axis=0)
labels = np.concatenate(
[np.array([0] * len(joint_P_x)), np.array([1] * len(joint_Q_x))]
).ravel()
).ravel()

# shuffle features and labels
features, labels = shuffle(features, labels)
Expand All @@ -79,44 +87,73 @@ def train_linear_classifier(self, p, q, x_p, x_q, classifier:str, classifier_kwa
classifier.fit(X=features, y=labels)
return classifier

def _eval_model(self, P, evaluation_sample, classifier):
def _eval_model(self, P, evaluation_sample, classifier):
evaluation = np.concatenate([P, evaluation_sample], axis=1)
probability = classifier.predict_proba(evaluation)[:, 0]
return probability

def _scores(self, p, q, x_p, x_q, classifier, cross_evaluate: bool=True, classifier_kwargs=None):
return probability

def _scores(
self,
p,
q,
x_p,
x_q,
classifier,
cross_evaluate: bool = True,
classifier_kwargs=None,
):
model_probabilities = []
for model, model_args in zip(classifier, classifier_kwargs):
if cross_evaluate:
model_probabilities.append(self._cross_eval_score(p, q, x_p, x_q, model, model_args))
else:
trained_model = self.train_linear_classifier(p, q, x_p, x_q, model, model_args)
model_probabilities.append(self._eval_model(P=p, classifier=trained_model))
for model, model_args in zip(classifier, classifier_kwargs):
if cross_evaluate:
model_probabilities.append(
self._cross_eval_score(p, q, x_p, x_q, model, model_args)
)
else:
trained_model = self.train_linear_classifier(
p, q, x_p, x_q, model, model_args
)
model_probabilities.append(
self._eval_model(P=p, classifier=trained_model)
)

return np.mean(model_probabilities, axis=0)

def _cross_eval_score(self, p, q, x_p, x_q, classifier, classifier_kwargs, n_cross_folds=5):
kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42) # Getting the shape
def _cross_eval_score(
self, p, q, x_p, x_q, classifier, classifier_kwargs, n_cross_folds=5
):
kf = KFold(
n_splits=n_cross_folds, shuffle=True, random_state=42
) # Getting the shape
cv_splits = kf.split(p)
# train classifiers over cv-folds
probabilities = []
self.evaluation_data = np.zeros((n_cross_folds, len(next(cv_splits)[1]), self.evaluation_context.shape[-1]))
self.evaluation_data = np.zeros(
(n_cross_folds, len(next(cv_splits)[1]), self.evaluation_context.shape[-1])
)
self.prior_evaluation = np.zeros_like(p)
kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42)

kf = KFold(n_splits=n_cross_folds, shuffle=True, random_state=42)
cv_splits = kf.split(p)
for cross_trial, (train_index, val_index) in enumerate(cv_splits):
# get train split
p_train, x_p_train = p[train_index,:], x_p[train_index,:]
q_train, x_q_train = q[train_index,:], x_q[train_index,:]
trained_nth_classifier = self.train_linear_classifier(p_train, q_train, x_p_train, x_q_train, classifier, classifier_kwargs)
p_train, x_p_train = p[train_index, :], x_p[train_index, :]
q_train, x_q_train = q[train_index, :], x_q[train_index, :]
trained_nth_classifier = self.train_linear_classifier(
p_train, q_train, x_p_train, x_q_train, classifier, classifier_kwargs
)
p_evaluate = p[val_index]
for index, p_validation in enumerate(p_evaluate):
for index, p_validation in enumerate(p_evaluate):
self.evaluation_data[cross_trial][index] = self.data.simulator.simulate(
p_validation, self.evaluation_context[val_index][index]
)
self.prior_evaluation[index] = p_validation
probabilities.append(self._eval_model(p_evaluate, self.evaluation_data[cross_trial], trained_nth_classifier))
probabilities.append(
self._eval_model(
p_evaluate,
self.evaluation_data[cross_trial],
trained_nth_classifier,
)
)
return probabilities

def permute_data(self, P, Q):
Expand All @@ -130,68 +167,68 @@ def permute_data(self, P, Q):
X = np.concatenate([P, Q], axis=0)
X_perm = X[self.data.rng.permutation(np.arange(n_samples * 2))]
return X_perm[:n_samples], X_perm[n_samples:]

def calculate(
self,
linear_classifier:Union[str, list[str]]='MLP',
cross_evaluate:bool=True,
n_null_hypothesis_trials=100,
classifier_kwargs:Union[dict, list[dict]]=None
):

if isinstance(linear_classifier, str):
def calculate(
self,
linear_classifier: Union[str, list[str]] = "MLP",
cross_evaluate: bool = True,
n_null_hypothesis_trials=100,
classifier_kwargs: Union[dict, list[dict]] = None,
):
if isinstance(linear_classifier, str):
linear_classifier = [linear_classifier]

if classifier_kwargs is None:
if classifier_kwargs is None:
classifier_kwargs = {}
if isinstance(classifier_kwargs, dict):
if isinstance(classifier_kwargs, dict):
classifier_kwargs = [classifier_kwargs]

probabilities = self._scores(
self.p,
self.q,
self.outcome_given_p,
self.outcome_given_q,
classifier=linear_classifier,
cross_evaluate=cross_evaluate,
classifier_kwargs=classifier_kwargs
self.p,
self.q,
self.outcome_given_p,
self.outcome_given_q,
classifier=linear_classifier,
cross_evaluate=cross_evaluate,
classifier_kwargs=classifier_kwargs,
)
null_hypothesis_probabilities = []
for _ in range(n_null_hypothesis_trials):
for _ in range(n_null_hypothesis_trials):
joint_P_x = np.concatenate([self.p, self.outcome_given_p], axis=1)
joint_Q_x = np.concatenate([self.q, self.outcome_given_q], axis=1)
joint_P_x_perm, joint_Q_x_perm = self.permute_data(
joint_P_x, joint_Q_x,
joint_P_x,
joint_Q_x,
)
p_null = joint_P_x_perm[:, : self.p.shape[-1]]
p_given_x_null = joint_P_x_perm[:, self.p.shape[-1] :]
q_null = joint_Q_x_perm[:, : self.q.shape[-1]]
q_given_x_null = joint_Q_x_perm[:, self.q.shape[-1] :]

null_result = self._scores(
p_null,
q_null,
p_given_x_null,
q_given_x_null,
classifier=linear_classifier,
cross_evaluate=cross_evaluate,
classifier_kwargs=classifier_kwargs
p_null,
q_null,
p_given_x_null,
q_given_x_null,
classifier=linear_classifier,
cross_evaluate=cross_evaluate,
classifier_kwargs=classifier_kwargs,
)

null_hypothesis_probabilities.append(null_result)
null = np.array(null_hypothesis_probabilities)

null = np.array(null_hypothesis_probabilities)
self.output = {
"lc2st_probabilities": probabilities.tolist(),
"lc2st_null_hypothesis_probabilities": null.tolist()
}
return probabilities, null

def __call__(self, **kwds: Any) -> Any:
try:
try:
self._collect_data_params()
except NotImplementedError:
pass
except NotImplementedError:
pass

self.calculate(**kwds)
self._finish()
self._finish()
Loading

0 comments on commit 4cd252c

Please sign in to comment.