[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
NKI-CCB · Aug 19, 2024 · 4c4fc97 · 4c4fc97
1 parent 4915d27
commit 4c4fc97
Show file tree

Hide file tree

Showing 16 changed files with 769 additions and 263 deletions.
diff --git a/.github/workflows/test.yaml.rej b/.github/workflows/test.yaml.rej
@@ -1,7 +1,7 @@
 diff a/.github/workflows/test.yaml b/.github/workflows/test.yaml	(rejected hunks)
 @@ -1,53 +1,67 @@
  name: Test
- 
+
  on:
 -    push:
 -        branches: [main]
@@ -13,13 +13,13 @@ diff a/.github/workflows/test.yaml b/.github/workflows/test.yaml	(rejected hunks
 +    branches: [main]
 +  schedule:
 +    - cron: "0 5 1,15 * *"
- 
+
  concurrency:
 -    group: ${{ github.workflow }}-${{ github.ref }}
 -    cancel-in-progress: true
 +  group: ${{ github.workflow }}-${{ github.ref }}
 +  cancel-in-progress: true
- 
+
  jobs:
 -    test:
 -        runs-on: ${{ matrix.os }}
@@ -37,7 +37,7 @@ diff a/.github/workflows/test.yaml b/.github/workflows/test.yaml	(rejected hunks
 +    defaults:
 +      run:
 +        shell: bash -e {0} # -e to fail on error
- 
+
 +    strategy:
 +      fail-fast: false
 +      matrix:

diff --git a/README.md.rej b/README.md.rej
@@ -1,10 +1,10 @@
 diff a/README.md b/README.md	(rejected hunks)
 @@ -17,7 +17,7 @@ Please refer to the [documentation][link-docs]. In particular, the
- 
+
  ## Installation
- 
+
 -You need to have Python 3.8 or newer installed on your system. If you don't have
 +You need to have Python 3.10 or newer installed on your system. If you don't have
  Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).
- 
+
  There are several alternative options to install sobolev_alignment:
diff --git a/docs/conf.py.rej b/docs/conf.py.rej
@@ -7,7 +7,7 @@ diff a/docs/conf.py b/docs/conf.py	(rejected hunks)
  # list see the documentation:
  # https://www.sphinx-doc.org/en/master/usage/configuration.html
 @@ -36,10 +36,10 @@ needs_sphinx = "4.0"
- 
+
  html_context = {
      "display_github": True,  # Integrate GitHub
 -    "github_user": "saroudant",  # Username
@@ -19,7 +19,7 @@ diff a/docs/conf.py b/docs/conf.py	(rejected hunks)
 +    "github_version": "main",
 +    "conf_py_path": "/docs/",
  }
- 
+
  # -- General configuration ---------------------------------------------------
 @@ -57,6 +57,7 @@ extensions = [
      "sphinx_autodoc_typehints",
@@ -28,22 +28,22 @@ diff a/docs/conf.py b/docs/conf.py	(rejected hunks)
 +    "sphinxext.opengraph",
      *[p.stem for p in (HERE / "extensions").glob("*.py")],
  ]
- 
+
 @@ -108,12 +109,15 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
  #
  html_theme = "sphinx_book_theme"
  html_static_path = ["_static"]
 +html_css_files = ["css/custom.css"]
 +
  html_title = project_name
- 
+
  html_theme_options = {
      "repository_url": repository_url,
      "use_repository_button": True,
      "path_to_docs": "docs/",
 +    "navigation_with_keys": False,
  }
- 
+
  pygments_style = "default"
 @@ -123,18 +127,3 @@ nitpick_ignore = [
      # you can add an exception to this list.

diff --git a/pyproject.toml.rej b/pyproject.toml.rej
@@ -15,7 +15,7 @@ diff a/pyproject.toml b/pyproject.toml	(rejected hunks)
 -    "session-info"
 +    "session-info",
  ]
- 
+
  [project.optional-dependencies]
  dev = [
 -    # CLI for bumping the version number
@@ -44,5 +44,5 @@ diff a/pyproject.toml b/pyproject.toml	(rejected hunks)
 -    "pytest-cov",
 +    "coverage",
  ]
- 
+
  [tool.coverage.run]
diff --git a/sobolev_alignment/feature_analysis.py b/sobolev_alignment/feature_analysis.py
@@ -79,7 +79,9 @@ def higher_order_contribution(
 
     # Compute features by iterating over possible combinations
     logging.info("\t START FEATURES")
-    combinations_features = Parallel(n_jobs=n_jobs, verbose=1, max_nbytes=1e6, pre_dispatch=int(1.5 * n_jobs))(
+    combinations_features = Parallel(
+        n_jobs=n_jobs, verbose=1, max_nbytes=1e6, pre_dispatch=int(1.5 * n_jobs)
+    )(
         delayed(combinatorial_product)(sparse_data, x, gamma)
         for x in combinations_with_replacement(np.arange(sparse_data.shape[1]), r=d)
     )
@@ -98,10 +100,18 @@ def higher_order_contribution(
     # Return names of each features.
     logging.info("\t\t FIND NAMES")
     combinations_names = Parallel(
-        n_jobs=min(5, n_jobs), verbose=1, max_nbytes=1e4, pre_dispatch=int(1.5 * min(5, n_jobs))
-    )(delayed(_interaction_name)(x) for x in combinations_with_replacement(gene_names, r=d))
+        n_jobs=min(5, n_jobs),
+        verbose=1,
+        max_nbytes=1e4,
+        pre_dispatch=int(1.5 * min(5, n_jobs)),
+    )(
+        delayed(_interaction_name)(x)
+        for x in combinations_with_replacement(gene_names, r=d)
+    )
 
-    return pd.DataFrame.sparse.from_spmatrix(data=combinations_features, columns=combinations_names)
+    return pd.DataFrame.sparse.from_spmatrix(
+        data=combinations_features, columns=combinations_names
+    )
 
 
 def _combination_to_idx(idx, p):
@@ -177,20 +187,29 @@ def combinatorial_product(x, idx, gamma):
         Values of the higher order feature.
     """
     # Iterate over all genes and compute the feature weight by multiplication
-    prod = [basis(x[:, i], k, gamma) for i, k in enumerate(_combination_to_idx(idx, x.shape[1])) if k > 0]
+    prod = [
+        basis(x[:, i], k, gamma)
+        for i, k in enumerate(_combination_to_idx(idx, x.shape[1]))
+        if k > 0
+    ]
     if len(prod) == 0:
         return 1
 
     return reduce(scipy.sparse.csc_matrix.multiply, prod)
 
 
 def _interaction_name(gene_combi):
-    combin_name = [f"{g}^{r}" for g, r in zip(*np.unique(gene_combi, return_counts=True))]
+    combin_name = [
+        f"{g}^{r}" for g, r in zip(*np.unique(gene_combi, return_counts=True))
+    ]
     return "*".join(combin_name) if len(combin_name) > 0 else "1"
 
 
 def _higher_order_interaction_wrapper(data, x, gamma, gene_names):
-    return [combinatorial_product(data, x, gamma), _interaction_name(gene_names, _combination_to_idx(x, data.shape[1]))]
+    return [
+        combinatorial_product(data, x, gamma),
+        _interaction_name(gene_names, _combination_to_idx(x, data.shape[1])),
+    ]
 
 
 def _compute_offset(data, gamma):

diff --git a/sobolev_alignment/generate_artificial_sample.py b/sobolev_alignment/generate_artificial_sample.py
@@ -66,7 +66,9 @@ def generate_samples(
         batch_name_ids = [batch_key_dict[n] for n in batch_names]
         batch_name_ids = torch.Tensor(np.array(batch_name_ids).reshape(-1, 1))
         # Recover log library size (exponential)
-        lib_size_samples = np.array([np.random.choice(lib_size[n], 1)[0] for n in batch_names])
+        lib_size_samples = np.array(
+            [np.random.choice(lib_size[n], 1)[0] for n in batch_names]
+        )
         lib_size_samples = np.log(lib_size_samples)
     else:
         batch_name_ids = None
@@ -82,7 +84,11 @@ def generate_samples(
         cont_covs = torch.Tensor(covariates_values)
 
     # Generate random noise
-    z = torch.Tensor(np.random.normal(size=(int(sample_size), model.init_params_["non_kwargs"]["n_latent"])))
+    z = torch.Tensor(
+        np.random.normal(
+            size=(int(sample_size), model.init_params_["non_kwargs"]["n_latent"])
+        )
+    )
     dist_param_samples = model.module.generative(
         z=z,
         library=torch.Tensor(np.array(lib_size_samples).reshape(-1, 1)),
@@ -156,8 +162,14 @@ def parallel_generate_samples(
     results = Parallel(n_jobs=n_jobs, verbose=1)(
         delayed(generate_samples)(
             sample_size=batch_size,
-            batch_names=batch_names[i : i + batch_size] if batch_names is not None else None,
-            covariates_values=covariates_values[i : i + batch_size] if covariates_values is not None else None,
+            batch_names=(
+                batch_names[i : i + batch_size] if batch_names is not None else None
+            ),
+            covariates_values=(
+                covariates_values[i : i + batch_size]
+                if covariates_values is not None
+                else None
+            ),
             lib_size=lib_size,
             model=model,
             batch_key_dict=batch_key_dict,

diff --git a/sobolev_alignment/interpolated_features.py b/sobolev_alignment/interpolated_features.py
@@ -9,7 +9,9 @@
 import scipy
 
 
-def compute_optimal_tau(PV_number, pv_projections, principal_angles, n_interpolation=100):
+def compute_optimal_tau(
+    PV_number, pv_projections, principal_angles, n_interpolation=100
+):
     """Compute the optimal interpolation step for each PV (Grassmann interpolation)."""
     ks_statistics = {}
     for tau_step in np.linspace(0, 1, n_interpolation + 1):
@@ -25,12 +27,22 @@ def compute_optimal_tau(PV_number, pv_projections, principal_angles, n_interpola
 
 def project_on_interpolate_PV(angle, PV_number, tau_step, pv_projections):
     """Project data on interpolated PVs."""
-    source_proj = np.sin((1 - tau_step) * angle) * pv_projections["source"]["source"][:, PV_number]
-    source_proj += np.sin(tau_step * angle) * pv_projections["target"]["source"][:, PV_number]
+    source_proj = (
+        np.sin((1 - tau_step) * angle)
+        * pv_projections["source"]["source"][:, PV_number]
+    )
+    source_proj += (
+        np.sin(tau_step * angle) * pv_projections["target"]["source"][:, PV_number]
+    )
     source_proj /= np.sin(angle)
 
-    target_proj = np.sin((1 - tau_step) * angle) * pv_projections["source"]["target"][:, PV_number]
-    target_proj += np.sin(tau_step * angle) * pv_projections["target"]["target"][:, PV_number]
+    target_proj = (
+        np.sin((1 - tau_step) * angle)
+        * pv_projections["source"]["target"][:, PV_number]
+    )
+    target_proj += (
+        np.sin(tau_step * angle) * pv_projections["target"]["target"][:, PV_number]
+    )
     target_proj /= np.sin(angle)
 
     return source_proj, target_proj
diff --git a/sobolev_alignment/krr_approx.py b/sobolev_alignment/krr_approx.py
@@ -34,7 +34,10 @@
     FALKON_IMPORTED = True
 except ImportError:
     FALKON_IMPORTED = False
-    print("FALKON NOT INSTALLED, OR NOT IMPORTED. USING FALKON WOULD RESULT IN BETTER PERFORMANCE.", flush=True)
+    print(
+        "FALKON NOT INSTALLED, OR NOT IMPORTED. USING FALKON WOULD RESULT IN BETTER PERFORMANCE.",
+        flush=True,
+    )
 from sklearn.gaussian_process.kernels import Matern, PairwiseKernel
 from sklearn.kernel_ridge import KernelRidge
 
@@ -133,7 +136,11 @@ def __init__(
 
         # Set kernel
         self.kernel = kernel
-        self.kernel_params = kernel_params if kernel_params else self.default_kernel_params[self.method][self.kernel]
+        self.kernel_params = (
+            kernel_params
+            if kernel_params
+            else self.default_kernel_params[self.method][self.kernel]
+        )
         self._make_kernel()
 
         # Set penalization parameters
@@ -147,7 +154,9 @@ def __init__(
         # Preprocessing
         self.mean_center = mean_center
         self.unit_std = unit_std
-        self.pre_process_ = StandardScaler(with_mean=mean_center, with_std=unit_std, copy=False)
+        self.pre_process_ = StandardScaler(
+            with_mean=mean_center, with_std=unit_std, copy=False
+        )
 
     def _make_kernel(self):
         """
@@ -160,17 +169,23 @@ def _make_kernel(self):
         # scikit-learn initialization
         if self.method.lower() == "sklearn":
             if self.sklearn_kernel[self.kernel.lower()] != "wrapper":
-                self.kernel_ = self.sklearn_kernel[self.kernel.lower()](**self.kernel_params)
+                self.kernel_ = self.sklearn_kernel[self.kernel.lower()](
+                    **self.kernel_params
+                )
             else:
-                self.kernel_ = PairwiseKernel(metric=self.kernel.lower(), **self.kernel_params)
+                self.kernel_ = PairwiseKernel(
+                    metric=self.kernel.lower(), **self.kernel_params
+                )
 
         # Falkon
         elif self.method.lower() == "falkon":
             self.kernel_ = self.falkon_kernel[self.kernel.lower()](**self.kernel_params)
 
         # If not implemented
         else:
-            raise NotImplementedError("%s not implemented. Choices: sklearn and falkon" % (self.method))
+            raise NotImplementedError(
+                "%s not implemented. Choices: sklearn and falkon" % (self.method)
+            )
 
         return True
 
@@ -197,7 +212,9 @@ def fit(self, X: torch.Tensor, y: torch.Tensor):
         # are False as it can have a large memory footprint.
         if self.mean_center or self.unit_std:
             self.pre_process_.fit(X)
-            self.training_data_ = torch.Tensor(self.pre_process_.transform(torch.Tensor(X)))
+            self.training_data_ = torch.Tensor(
+                self.pre_process_.transform(torch.Tensor(X))
+            )
         else:
             self.training_data_ = X
 
@@ -296,7 +313,9 @@ def transform(self, X: torch.Tensor):
         elif self.method == "falkon":
             return self.ridge_clf_.predict(X)
         else:
-            raise NotImplementedError("%s not implemented. Choices: sklearn and falkon" % (self.method))
+            raise NotImplementedError(
+                "%s not implemented. Choices: sklearn and falkon" % (self.method)
+            )
 
     def save(self, folder: str = "."):
         """
@@ -330,12 +349,19 @@ def save(self, folder: str = "."):
         # Save important material:
         #   - KRR weights
         #   - Samples used for prediction.
-        torch.save(torch.Tensor(self.anchors()), open("%s/sample_anchors.pt" % (folder), "wb"))
-        torch.save(torch.Tensor(self.sample_weights_), open("%s/sample_weights.pt" % (folder), "wb"))
+        torch.save(
+            torch.Tensor(self.anchors()), open("%s/sample_anchors.pt" % (folder), "wb")
+        )
+        torch.save(
+            torch.Tensor(self.sample_weights_),
+            open("%s/sample_weights.pt" % (folder), "wb"),
+        )
 
         # Save weights and anchors as csv.
         # Longer to load, but compatible with all platforms.
-        np.savetxt("%s/sample_weights.csv" % (folder), self.sample_weights_.detach().numpy())
+        np.savetxt(
+            "%s/sample_weights.csv" % (folder), self.sample_weights_.detach().numpy()
+        )
         np.savetxt("%s/sample_anchors.csv" % (folder), self.anchors().detach().numpy())
 
         return True
@@ -356,15 +382,21 @@ def load(folder: str = "."):
         # Load and format parameters.
         params = load(open("%s/params.pkl" % (folder), "rb"))
         krr_params = {
-            e: f for e, f in params.items() if e in ["method", "M", "penalization", "mean_center", "unit_std"]
+            e: f
+            for e, f in params.items()
+            if e in ["method", "M", "penalization", "mean_center", "unit_std"]
         }
         # krr_params['kernel'] = krr_params['kernel'].kernel_name
         krr_approx_clf = KRRApprox(**krr_params)
         krr_approx_clf.kernel_ = params["kernel"]
 
         # Load sample weights and anchors.
-        krr_approx_clf.sample_weights_ = torch.load(open("%s/sample_weights.pt" % (folder), "rb"))
-        krr_approx_clf.training_data_ = torch.load(open("%s/sample_anchors.pt" % (folder), "rb"))
+        krr_approx_clf.sample_weights_ = torch.load(
+            open("%s/sample_weights.pt" % (folder), "rb")
+        )
+        krr_approx_clf.training_data_ = torch.load(
+            open("%s/sample_anchors.pt" % (folder), "rb")
+        )
 
         # Set up classifiers for out-of-sample application.
         krr_approx_clf._setup_clf()