int-brain-lab · berkgercek · May 8, 2023 · Jun 5, 2023 · Jun 16, 2023 · Jun 16, 2023
diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
+FROM ubuntu:latest
 # This can optionally be built with just ubuntu, rather than the nvidia cuda container.
 # If saving space is a concern, this is the way to go.
 LABEL description="Core container which has the basic necessities to run analyses in the\
@@ -15,24 +15,17 @@ COPY ./environment.yaml /data/environment.yaml
 SHELL ["/bin/bash", "-c"]
 # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯
 RUN apt update && apt install -y wget git libgl1-mesa-dev
-RUN wget -O Mambaforge.sh  "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-RUN bash Mambaforge.sh -b -p /opt/conda && rm Mambaforge.sh
+RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+RUN bash Miniforge3.sh -b -p /opt/conda && rm Miniforge3.sh
+RUN wget -O iblreq.txt "https://raw.githubusercontent.com/int-brain-lab/ibllib/master/requirements.txt"
+RUN head -n -1 iblreq.txt > requirements.txt
+RUN rm iblreq.txt
 RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
  mamba install --yes conda-build &&\
  mamba env create -n iblenv --file=environment.yaml"
-RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
- conda activate iblenv &&\
- mamba install --yes pytorch pytorch-cuda=11.7 -c pytorch -c nvidia &&\
- conda clean --all -f -y"
-RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
- conda activate iblenv &&\
- pip install globus-sdk iblutil ibl-neuropixel ONE-api phylib pynrrd slidingRP &&\
- git clone https://github.com/int-brain-lab/iblapps.git &&\
- conda develop ./iblapps &&\
- git clone https://github.com/int-brain-lab/ibllib &&\
- conda develop ./ibllib &&\
- git clone https://github.com/berkgercek/neurencoding &&\
- conda develop ./neurencoding"
+RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
+ conda activate iblenv && pip install -r requirements.txt && pip install ibllib --no-deps"
+RUN rm requirements.txt
 # The below allows interactively running the container with the correct environment, but be warned
 # that this will not work with commands passed to the container in a non-interactive shell.
 # In the case of e.g. `docker run thiscontainer python myscript.py`, the environment will not

diff --git a/brainwidemap/encoding/README.md b/brainwidemap/encoding/README.md
@@ -38,7 +38,7 @@ The `scripts/` folder contains small scripts that either run plotting or simple
 
 ### Cluster worker
 
-`cluster_worker.py` implements a mother script for cluster workers to process individual probe insertions. This relies on a cached dataset, produced using the `pipelines/01_cache_regressors.py` script, as well as several files specifying the identity and parameters of a cached dataset and the parameters of the current run of the model.
+`cluster_worker.py` implements a mother script for cluster workers to process individual probe insertions. This relies on a cached dataset, produced using the `pipelines/01_cache_regressors.py` script, as well as several files specifying the identity and parameters of a cached dataset and the parameters of the current run of the model. Note that the params.py file wil need to point to the appropriate cache locations as well for the worker to function.
 
 ### Design matrix
 

diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py
@@ -14,6 +14,7 @@
 
 # Third party libraries
 import numpy as np
+from pandas import read_pickle
 
 # Brainwidemap repo imports
 from brainwidemap.encoding.design import generate_design
@@ -23,7 +24,7 @@
 
 def get_cached_regressors(fpath):
     with open(fpath, "rb") as fo:
-        d = pickle.load(fo)
+        d = read_pickle(fo)
     return d["trialsdf"], d["spk_times"], d["spk_clu"], d["clu_regions"], d["clu_df"]
 
 
@@ -37,9 +38,9 @@ def _create_sub_sess_path(parent, subject, session):
     return sesspath
 
 
-def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate):
+def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate, splitstr=""):
     sesspath = _create_sub_sess_path(GLM_FIT_PATH, subject, session_id)
-    fn = sesspath.joinpath(f"{fitdate}_{probes}_stepwise_regression.pkl")
+    fn = sesspath.joinpath(f"{fitdate}_{probes}{splitstr}_stepwise_regression.pkl")
     outdict = {
         "params": params,
         "probes": probes,
@@ -81,14 +82,41 @@ def fit_save_inputs(
     t_before,
     fitdate,
     null=None,
+    earlyrts=False,
+    laterts=False,
 ):
     stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn)
     sessprior = stdf["probabilityLeft"]
-    sessdesign = generate_design(stdf, sessprior, t_before, **params)
+    match (earlyrts, laterts):
+        case (False, False):
+            splitstr = ""
+        case (True, False):
+            splitstr = "_earlyrt"
+        case (False, True):
+            splitstr = "_latert"
+    if not earlyrts and not laterts:
+        sessdesign = generate_design(stdf, sessprior, t_before, **params)
+    else:
+        # Handle early and late RT flags, compute median for session if necessary
+        if "rt_thresh" not in params:
+            raise ValueError("Must specify rt_thresh if fitting early or late RTs")
+        if laterts and earlyrts:
+            raise ValueError(
+                "Cannot fit both early and late RTs. Disable both flags to fit all trials."
+            )
+        if params["rt_thresh"] == "session_median":
+            params["rt_thresh"] = np.median(stdf["firstMovement_times"] - stdf["trial_start"])
+
+        if earlyrts:
+            mask = (stdf["firstMovement_times"] - stdf["trial_start"]) < params["rt_thresh"]
+        elif laterts:
+            mask = (stdf["firstMovement_times"] - stdf["trial_start"]) >= params["rt_thresh"]
+        stdf = stdf[mask]
+        sessdesign = generate_design(stdf, sessprior, t_before, **params)
     if null is None:
         sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params)
         outputfn = save_stepwise(
-            subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate
+            subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate, splitstr
         )
     elif null == "pseudosession_pleft_iti":
         sessfit, nullfits = fit_stepwise_with_pseudoblocks(
@@ -114,11 +142,13 @@ def fit_save_inputs(
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Cluster GLM fitter. This script is called by"
-                                                 "the batch script generated in "
-                                                 "pipelines/02_fit_sessions.py and should in most "
-                                                 "cases beyond debugging not be used in a "
-                                                 "standalone fashion.")
+    parser = argparse.ArgumentParser(
+        description="Cluster GLM fitter. This script is called by"
+        "the batch script generated in "
+        "pipelines/02_fit_sessions.py and should in most "
+        "cases beyond debugging not be used in a "
+        "standalone fashion."
+    )
     parser.add_argument(
         "datafile",
         type=Path,
@@ -131,6 +161,16 @@ def fit_save_inputs(
     )
     parser.add_argument("fitdate", help="Date of fit for output file")
     parser.add_argument("--impostor_path", type=Path, help="Path to main impostor df file")
+    parser.add_argument(
+        "--earlyrt",
+        action="store_true",
+        help="Whether to fit separate movement kernels to early trials",
+    )
+    parser.add_argument(
+        "--latert",
+        action="store_true",
+        help="Whether to fit separate movement kernels to late trials",
+    )
     args = parser.parse_args()
 
     with open(args.datafile, "rb") as fo:
@@ -154,6 +194,8 @@ def fit_save_inputs(
         t_before,
         args.fitdate,
         null=params["null"],
+        earlyrts=args.earlyrt,
+        laterts=args.latert,
     )
     print("Fitting completed successfully!")
     print(outputfn)
diff --git a/brainwidemap/encoding/design.py b/brainwidemap/encoding/design.py
@@ -6,13 +6,12 @@
 # Standard library
 import logging
 
+# IBL libraries
+import neurencoding.design_matrix as dm
+
 # Third party libraries
 import numpy as np
 import pandas as pd
-from scipy.stats import norm
-
-# IBL libraries
-import neurencoding.design_matrix as dm
 
 _logger = logging.getLogger("brainwide")
 

diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml
@@ -1,31 +1,14 @@
 name: iblenv
 dependencies:
-  - python=3.9
-  - apptools >= 4.5.0
-  - boto3
-  - click
-  - colorcet
-  - colorlog
-  - cython
-  - dataclasses
-  - flake8
-  - graphviz
-  - h5py
+  - python=3.10
   - ipython
   - matplotlib
   - numba
-  - numpy
   - pandas
-  - plotly
-  - pyarrow
-  - pyflakes >= 2.4.0
-  - pytest
-  - requests
   - scikit-learn
   - scipy >=1.4.1
   - seaborn
   - statsmodels
   - tqdm
   - pip
-  - pip:
-    - opencv-python
+  - pyqt<6
diff --git a/brainwidemap/encoding/fit.py b/brainwidemap/encoding/fit.py
@@ -13,12 +13,25 @@
 from brainwidemap.encoding.design import generate_design
 
 
-def fit(design, spk_t, spk_clu, binwidth, model, estimator, n_folds=5, contiguous=False, **kwargs):
+def fit(
+    design,
+    spk_t,
+    spk_clu,
+    binwidth,
+    model,
+    estimator,
+    n_folds=5,
+    contiguous=False,
+    mintrials=100,
+    **kwargs
+):
     """
     Function to fit a model using a cross-validated design matrix.
     """
     trials_idx = design.trialsdf.index
-    nglm = model(design, spk_t, spk_clu, binwidth=binwidth, estimator=estimator, mintrials=0)
+    nglm = model(
+        design, spk_t, spk_clu, binwidth=binwidth, estimator=estimator, mintrials=mintrials
+    )
     splitter = KFold(n_folds, shuffle=not contiguous)
     scores, weights, intercepts, alphas, splits = [], [], [], [], []
     for test, train in splitter.split(trials_idx):
@@ -52,6 +65,7 @@ def fit_stepwise(
     estimator,
     n_folds=5,
     contiguous=False,
+    mintrials=100,
     seqsel_kwargs={},
     seqselfit_kwargs={},
     **kwargs
@@ -107,7 +121,9 @@ def fit_stepwise(
             splits: list of dicts containing the test and train indices for each fold.
     """
     trials_idx = design.trialsdf.index
-    nglm = model(design, spk_t, spk_clu, binwidth=binwidth, estimator=estimator, mintrials=0)
+    nglm = model(
+        design, spk_t, spk_clu, binwidth=binwidth, estimator=estimator, mintrials=mintrials
+    )
     splitter = KFold(n_folds, shuffle=not contiguous)
     sequences, scores, deltas, splits = [], [], [], []
     for test, train in tqdm(splitter.split(trials_idx), desc="Fold", leave=False):

diff --git a/brainwidemap/encoding/glm_predict.py b/brainwidemap/encoding/glm_predict.py
@@ -204,23 +204,7 @@ def psth_summary(self, align_time, unit, t_before=0.1, t_after=0.6, trials=None,
             ax=ax[0],
             smoothing=0.01,
         )
-        keytuple = (align_time, t_before, t_after, tuple(trials))
-        if keytuple not in self.full_psths:
-            self.full_psths[keytuple] = pred_psth(
-                self.nglm, align_time, t_before, t_after, trials=trials
-            )
-            self.cov_psths[keytuple] = {}
-            tmp = self.cov_psths[keytuple]
-            for cov in self.covar:
-                tmp[cov] = pred_psth(
-                    self.nglm,
-                    align_time,
-                    t_before,
-                    t_after,
-                    targ_regressors=[cov],
-                    trials=trials,
-                    incl_bias=False,
-                )
+        keytuple = self.compute_model_psth(align_time, t_before, t_after, trials)
         for cov in self.covar:
             ax[2].plot(self.combweights[cov].loc[unit])
         ax[2].set_title("Individual kernels (not PSTH contrib)")
@@ -244,3 +228,45 @@ def psth_summary(self, align_time, unit, t_before=0.1, t_after=0.6, trials=None,
             plt.suptitle(f"Unit {unit}")
         plt.tight_layout()
         return ax
+
+    def compute_model_psth(self, align_time, t_before, t_after, trials):
+        """
+        Generate and store internally model PSTHs for a given alignment time and trials.
+
+        Parameters
+        ----------
+        align_time : str
+            Column in the design matrix to align PSTH to
+        t_before : float
+            Time before the align time to compute PSTH for
+        t_after : _type_
+            Time after the align time to compute PSTH for
+        trials : array-like of int
+            List of trials on which to compute the PSTH
+
+        Returns
+        -------
+        tuple
+            4-tuple with the alignment time, time before, time after, and trials used to compute,
+            can be used as a key in the internal self.full_psths and self.cov_psths dictionaries,
+            which contain the full PSTH and the PSTH for each regressor, respectively.
+        """
+        keytuple = (align_time, t_before, t_after, tuple(trials))
+        if keytuple not in self.full_psths:
+            self.full_psths[keytuple] = pred_psth(
+                self.nglm, align_time, t_before, t_after, trials=trials
+            )
+            self.cov_psths[keytuple] = {}
+            tmp = self.cov_psths[keytuple]
+            for cov in self.covar:
+                tmp[cov] = pred_psth(
+                    self.nglm,
+                    align_time,
+                    t_before,
+                    t_after,
+                    targ_regressors=[cov],
+                    trials=trials,
+                    incl_bias=False,
+                )
+
+        return keytuple
diff --git a/brainwidemap/encoding/params.py b/brainwidemap/encoding/params.py
@@ -4,5 +4,5 @@
 work.
 """
 
-GLM_CACHE = "/mnt/Storage/glm_cache/"
-GLM_FIT_PATH = "/mnt/Storage/results/glms/"
+GLM_CACHE = "/home/gercek/Projects/glm_cache/"
+GLM_FIT_PATH = "/home/gercek/Projects/results/glms/"
diff --git a/brainwidemap/encoding/pipelines/01_cache_regressors.py b/brainwidemap/encoding/pipelines/01_cache_regressors.py
@@ -7,14 +7,11 @@
 
 # Third party libraries
 import dask
-import numpy as np
 import pandas as pd
 from dask.distributed import Client
 from dask_jobqueue import SLURMCluster
 
 # IBL libraries
-import brainbox.io.one as bbone
-from iblutil.numerical import ismember
 from one.api import ONE
 from brainwidemap.encoding.params import GLM_CACHE
 from brainwidemap.encoding.utils import load_regressors
@@ -68,7 +65,7 @@ def delayed_loadsave(subject, session_id, pid, params):
 T_BEF = 0.6  # Time before stimulus onset to include in the definition of the trial
 T_AFT = 0.6  # Time after feedback to include in the definition of a trial
 BINWIDTH = 0.02  # Size of binwidth for wheel velocity traces, in seconds
-ABSWHEEL = False  # Whether to return wheel velocity (False) or speed (True)
+ABSWHEEL = True  # Whether to return wheel velocity (False) or speed (True)
 CLU_CRITERIA = "bwm"  # Criteria on cluster inclusion in cache
 # End parameters
 
@@ -79,13 +76,15 @@ def delayed_loadsave(subject, session_id, pid, params):
     "binwidth": BINWIDTH,
     "abswheel": ABSWHEEL,
     "clu_criteria": CLU_CRITERIA,
+    "one_url": "https://alyx.internationalbrainlab.org",
+    "one_pw": "international",
 }
 
-pw = 'international'
-one = ONE(base_url='https://openalyx.internationalbrainlab.org', password=pw, silent=True)
+one = ONE(base_url=params["one_url"], silent=True)
 dataset_futures = []
 
-sessdf = bwm_query().set_index("pid")
+freeze = "2023_12_bwm_release" if CLU_CRITERIA == "bwm" else None
+sessdf = bwm_query(freeze=freeze).set_index("pid")
 
 for pid, rec in sessdf.iterrows():
     subject = rec.subject
@@ -110,7 +109,7 @@ def delayed_loadsave(subject, session_id, pid, params):
         f"export OPENBLAS_NUM_THREADS={N_CORES}",
     ],
 )
-cluster.scale(40)
+cluster.scale(20)
 client = Client(cluster)
 
 tmp_futures = [client.compute(future[3]) for future in dataset_futures]