Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jul 15, 2024
1 parent 944de9f commit 9e164e4
Show file tree
Hide file tree
Showing 14 changed files with 758 additions and 247 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/test.yaml.rej
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
diff a/.github/workflows/test.yaml b/.github/workflows/test.yaml (rejected hunks)
@@ -1,53 +1,67 @@
name: Test

on:
- push:
- branches: [main]
Expand All @@ -13,13 +13,13 @@ diff a/.github/workflows/test.yaml b/.github/workflows/test.yaml (rejected hunks
+ branches: [main]
+ schedule:
+ - cron: "0 5 1,15 * *"

concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
- cancel-in-progress: true
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true

jobs:
- test:
- runs-on: ${{ matrix.os }}
Expand All @@ -37,7 +37,7 @@ diff a/.github/workflows/test.yaml b/.github/workflows/test.yaml (rejected hunks
+ defaults:
+ run:
+ shell: bash -e {0} # -e to fail on error

+ strategy:
+ fail-fast: false
+ matrix:
Expand Down
6 changes: 3 additions & 3 deletions README.md.rej
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
diff a/README.md b/README.md (rejected hunks)
@@ -17,7 +17,7 @@ Please refer to the [documentation][link-docs]. In particular, the

## Installation

-You need to have Python 3.8 or newer installed on your system. If you don't have
+You need to have Python 3.10 or newer installed on your system. If you don't have
Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).

There are several alternative options to install sobolev_alignment:
10 changes: 5 additions & 5 deletions docs/conf.py.rej
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ diff a/docs/conf.py b/docs/conf.py (rejected hunks)
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
@@ -36,10 +36,10 @@ needs_sphinx = "4.0"

html_context = {
"display_github": True, # Integrate GitHub
- "github_user": "saroudant", # Username
Expand All @@ -19,7 +19,7 @@ diff a/docs/conf.py b/docs/conf.py (rejected hunks)
+ "github_version": "main",
+ "conf_py_path": "/docs/",
}

# -- General configuration ---------------------------------------------------
@@ -57,6 +57,7 @@ extensions = [
"sphinx_autodoc_typehints",
Expand All @@ -28,22 +28,22 @@ diff a/docs/conf.py b/docs/conf.py (rejected hunks)
+ "sphinxext.opengraph",
*[p.stem for p in (HERE / "extensions").glob("*.py")],
]

@@ -108,12 +109,15 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
#
html_theme = "sphinx_book_theme"
html_static_path = ["_static"]
+html_css_files = ["css/custom.css"]
+
html_title = project_name

html_theme_options = {
"repository_url": repository_url,
"use_repository_button": True,
"path_to_docs": "docs/",
+ "navigation_with_keys": False,
}

pygments_style = "default"
@@ -123,18 +127,3 @@ nitpick_ignore = [
# you can add an exception to this list.
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml.rej
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ diff a/pyproject.toml b/pyproject.toml (rejected hunks)
- "session-info"
+ "session-info",
]

[project.optional-dependencies]
dev = [
- # CLI for bumping the version number
Expand Down Expand Up @@ -44,5 +44,5 @@ diff a/pyproject.toml b/pyproject.toml (rejected hunks)
- "pytest-cov",
+ "coverage",
]

[tool.coverage.run]
33 changes: 26 additions & 7 deletions sobolev_alignment/feature_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def higher_order_contribution(

# Compute features by iterating over possible combinations
logging.info("\t START FEATURES")
combinations_features = Parallel(n_jobs=n_jobs, verbose=1, max_nbytes=1e6, pre_dispatch=int(1.5 * n_jobs))(
combinations_features = Parallel(
n_jobs=n_jobs, verbose=1, max_nbytes=1e6, pre_dispatch=int(1.5 * n_jobs)
)(
delayed(combinatorial_product)(sparse_data, x, gamma)
for x in combinations_with_replacement(np.arange(sparse_data.shape[1]), r=d)
)
Expand All @@ -98,10 +100,18 @@ def higher_order_contribution(
# Return names of each features.
logging.info("\t\t FIND NAMES")
combinations_names = Parallel(
n_jobs=min(5, n_jobs), verbose=1, max_nbytes=1e4, pre_dispatch=int(1.5 * min(5, n_jobs))
)(delayed(_interaction_name)(x) for x in combinations_with_replacement(gene_names, r=d))
n_jobs=min(5, n_jobs),
verbose=1,
max_nbytes=1e4,
pre_dispatch=int(1.5 * min(5, n_jobs)),
)(
delayed(_interaction_name)(x)
for x in combinations_with_replacement(gene_names, r=d)
)

return pd.DataFrame.sparse.from_spmatrix(data=combinations_features, columns=combinations_names)
return pd.DataFrame.sparse.from_spmatrix(
data=combinations_features, columns=combinations_names
)


def _combination_to_idx(idx, p):
Expand Down Expand Up @@ -177,20 +187,29 @@ def combinatorial_product(x, idx, gamma):
Values of the higher order feature.
"""
# Iterate over all genes and compute the feature weight by multiplication
prod = [basis(x[:, i], k, gamma) for i, k in enumerate(_combination_to_idx(idx, x.shape[1])) if k > 0]
prod = [
basis(x[:, i], k, gamma)
for i, k in enumerate(_combination_to_idx(idx, x.shape[1]))
if k > 0
]
if len(prod) == 0:
return 1

return reduce(scipy.sparse.csc_matrix.multiply, prod)


def _interaction_name(gene_combi):
combin_name = [f"{g}^{r}" for g, r in zip(*np.unique(gene_combi, return_counts=True))]
combin_name = [
f"{g}^{r}" for g, r in zip(*np.unique(gene_combi, return_counts=True))
]
return "*".join(combin_name) if len(combin_name) > 0 else "1"


def _higher_order_interaction_wrapper(data, x, gamma, gene_names):
return [combinatorial_product(data, x, gamma), _interaction_name(gene_names, _combination_to_idx(x, data.shape[1]))]
return [
combinatorial_product(data, x, gamma),
_interaction_name(gene_names, _combination_to_idx(x, data.shape[1])),
]


def _compute_offset(data, gamma):
Expand Down
20 changes: 16 additions & 4 deletions sobolev_alignment/generate_artificial_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ def generate_samples(
batch_name_ids = [batch_key_dict[n] for n in batch_names]
batch_name_ids = torch.Tensor(np.array(batch_name_ids).reshape(-1, 1))
# Recover log library size (exponential)
lib_size_samples = np.array([np.random.choice(lib_size[n], 1)[0] for n in batch_names])
lib_size_samples = np.array(
[np.random.choice(lib_size[n], 1)[0] for n in batch_names]
)
lib_size_samples = np.log(lib_size_samples)
else:
batch_name_ids = None
Expand All @@ -82,7 +84,11 @@ def generate_samples(
cont_covs = torch.Tensor(covariates_values)

# Generate random noise
z = torch.Tensor(np.random.normal(size=(int(sample_size), model.init_params_["non_kwargs"]["n_latent"])))
z = torch.Tensor(
np.random.normal(
size=(int(sample_size), model.init_params_["non_kwargs"]["n_latent"])
)
)
dist_param_samples = model.module.generative(
z=z,
library=torch.Tensor(np.array(lib_size_samples).reshape(-1, 1)),
Expand Down Expand Up @@ -156,8 +162,14 @@ def parallel_generate_samples(
results = Parallel(n_jobs=n_jobs, verbose=1)(
delayed(generate_samples)(
sample_size=batch_size,
batch_names=batch_names[i : i + batch_size] if batch_names is not None else None,
covariates_values=covariates_values[i : i + batch_size] if covariates_values is not None else None,
batch_names=(
batch_names[i : i + batch_size] if batch_names is not None else None
),
covariates_values=(
covariates_values[i : i + batch_size]
if covariates_values is not None
else None
),
lib_size=lib_size,
model=model,
batch_key_dict=batch_key_dict,
Expand Down
22 changes: 17 additions & 5 deletions sobolev_alignment/interpolated_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import scipy


def compute_optimal_tau(PV_number, pv_projections, principal_angles, n_interpolation=100):
def compute_optimal_tau(
PV_number, pv_projections, principal_angles, n_interpolation=100
):
"""Compute the optimal interpolation step for each PV (Grassmann interpolation)."""
ks_statistics = {}
for tau_step in np.linspace(0, 1, n_interpolation + 1):
Expand All @@ -25,12 +27,22 @@ def compute_optimal_tau(PV_number, pv_projections, principal_angles, n_interpola

def project_on_interpolate_PV(angle, PV_number, tau_step, pv_projections):
"""Project data on interpolated PVs."""
source_proj = np.sin((1 - tau_step) * angle) * pv_projections["source"]["source"][:, PV_number]
source_proj += np.sin(tau_step * angle) * pv_projections["target"]["source"][:, PV_number]
source_proj = (
np.sin((1 - tau_step) * angle)
* pv_projections["source"]["source"][:, PV_number]
)
source_proj += (
np.sin(tau_step * angle) * pv_projections["target"]["source"][:, PV_number]
)
source_proj /= np.sin(angle)

target_proj = np.sin((1 - tau_step) * angle) * pv_projections["source"]["target"][:, PV_number]
target_proj += np.sin(tau_step * angle) * pv_projections["target"]["target"][:, PV_number]
target_proj = (
np.sin((1 - tau_step) * angle)
* pv_projections["source"]["target"][:, PV_number]
)
target_proj += (
np.sin(tau_step * angle) * pv_projections["target"]["target"][:, PV_number]
)
target_proj /= np.sin(angle)

return source_proj, target_proj
60 changes: 46 additions & 14 deletions sobolev_alignment/krr_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@
FALKON_IMPORTED = True
except ImportError:
FALKON_IMPORTED = False
print("FALKON NOT INSTALLED, OR NOT IMPORTED. USING FALKON WOULD RESULT IN BETTER PERFORMANCE.", flush=True)
print(
"FALKON NOT INSTALLED, OR NOT IMPORTED. USING FALKON WOULD RESULT IN BETTER PERFORMANCE.",
flush=True,
)
from sklearn.gaussian_process.kernels import Matern, PairwiseKernel
from sklearn.kernel_ridge import KernelRidge

Expand Down Expand Up @@ -133,7 +136,11 @@ def __init__(

# Set kernel
self.kernel = kernel
self.kernel_params = kernel_params if kernel_params else self.default_kernel_params[self.method][self.kernel]
self.kernel_params = (
kernel_params
if kernel_params
else self.default_kernel_params[self.method][self.kernel]
)
self._make_kernel()

# Set penalization parameters
Expand All @@ -147,7 +154,9 @@ def __init__(
# Preprocessing
self.mean_center = mean_center
self.unit_std = unit_std
self.pre_process_ = StandardScaler(with_mean=mean_center, with_std=unit_std, copy=False)
self.pre_process_ = StandardScaler(
with_mean=mean_center, with_std=unit_std, copy=False
)

def _make_kernel(self):
"""
Expand All @@ -160,17 +169,23 @@ def _make_kernel(self):
# scikit-learn initialization
if self.method.lower() == "sklearn":
if self.sklearn_kernel[self.kernel.lower()] != "wrapper":
self.kernel_ = self.sklearn_kernel[self.kernel.lower()](**self.kernel_params)
self.kernel_ = self.sklearn_kernel[self.kernel.lower()](
**self.kernel_params
)
else:
self.kernel_ = PairwiseKernel(metric=self.kernel.lower(), **self.kernel_params)
self.kernel_ = PairwiseKernel(
metric=self.kernel.lower(), **self.kernel_params
)

# Falkon
elif self.method.lower() == "falkon":
self.kernel_ = self.falkon_kernel[self.kernel.lower()](**self.kernel_params)

# If not implemented
else:
raise NotImplementedError("%s not implemented. Choices: sklearn and falkon" % (self.method))
raise NotImplementedError(
"%s not implemented. Choices: sklearn and falkon" % (self.method)
)

return True

Expand All @@ -197,7 +212,9 @@ def fit(self, X: torch.Tensor, y: torch.Tensor):
# are False as it can have a large memory footprint.
if self.mean_center or self.unit_std:
self.pre_process_.fit(X)
self.training_data_ = torch.Tensor(self.pre_process_.transform(torch.Tensor(X)))
self.training_data_ = torch.Tensor(
self.pre_process_.transform(torch.Tensor(X))
)
else:
self.training_data_ = X

Expand Down Expand Up @@ -296,7 +313,9 @@ def transform(self, X: torch.Tensor):
elif self.method == "falkon":
return self.ridge_clf_.predict(X)
else:
raise NotImplementedError("%s not implemented. Choices: sklearn and falkon" % (self.method))
raise NotImplementedError(
"%s not implemented. Choices: sklearn and falkon" % (self.method)
)

def save(self, folder: str = "."):
"""
Expand Down Expand Up @@ -330,12 +349,19 @@ def save(self, folder: str = "."):
# Save important material:
# - KRR weights
# - Samples used for prediction.
torch.save(torch.Tensor(self.anchors()), open("%s/sample_anchors.pt" % (folder), "wb"))
torch.save(torch.Tensor(self.sample_weights_), open("%s/sample_weights.pt" % (folder), "wb"))
torch.save(
torch.Tensor(self.anchors()), open("%s/sample_anchors.pt" % (folder), "wb")
)
torch.save(
torch.Tensor(self.sample_weights_),
open("%s/sample_weights.pt" % (folder), "wb"),
)

# Save weights and anchors as csv.
# Longer to load, but compatible with all platforms.
np.savetxt("%s/sample_weights.csv" % (folder), self.sample_weights_.detach().numpy())
np.savetxt(
"%s/sample_weights.csv" % (folder), self.sample_weights_.detach().numpy()
)
np.savetxt("%s/sample_anchors.csv" % (folder), self.anchors().detach().numpy())

return True
Expand All @@ -356,15 +382,21 @@ def load(folder: str = "."):
# Load and format parameters.
params = load(open("%s/params.pkl" % (folder), "rb"))
krr_params = {
e: f for e, f in params.items() if e in ["method", "M", "penalization", "mean_center", "unit_std"]
e: f
for e, f in params.items()
if e in ["method", "M", "penalization", "mean_center", "unit_std"]
}
# krr_params['kernel'] = krr_params['kernel'].kernel_name
krr_approx_clf = KRRApprox(**krr_params)
krr_approx_clf.kernel_ = params["kernel"]

# Load sample weights and anchors.
krr_approx_clf.sample_weights_ = torch.load(open("%s/sample_weights.pt" % (folder), "rb"))
krr_approx_clf.training_data_ = torch.load(open("%s/sample_anchors.pt" % (folder), "rb"))
krr_approx_clf.sample_weights_ = torch.load(
open("%s/sample_weights.pt" % (folder), "rb")
)
krr_approx_clf.training_data_ = torch.load(
open("%s/sample_anchors.pt" % (folder), "rb")
)

# Set up classifiers for out-of-sample application.
krr_approx_clf._setup_clf()
Expand Down
Loading

0 comments on commit 9e164e4

Please sign in to comment.