From a4daa121e180e92c80d82d3a5535a54c1d75515e Mon Sep 17 00:00:00 2001 From: Igor Carrara <94047258+carraraig@users.noreply.github.com> Date: Tue, 30 May 2023 16:17:15 +0200 Subject: [PATCH 01/64] Restore subject 32 46 and 49 of Cho2017 dataset (#392) * Resore subject 32 46 and 49 of Cho2017 dataset * Resore subject 32 46 and 49 of Cho2017 dataset --- docs/source/whats_new.rst | 2 +- moabb/datasets/gigadb.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index eca7d884c..05bde11a5 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -23,7 +23,7 @@ Enhancements Bugs ~~~~ -- None +- Restore 3 subject from Cho2017 (:gh:`392` by `Igor Carrara`_ and `Sylvain Chevallier`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/gigadb.py b/moabb/datasets/gigadb.py index 590577741..cd1002f53 100644 --- a/moabb/datasets/gigadb.py +++ b/moabb/datasets/gigadb.py @@ -77,9 +77,6 @@ def __init__(self): doi="10.5524/100295", ) - for ii in [32, 46, 49]: - self.subject_list.remove(ii) - def _get_single_subject_data(self, subject): """return data for a single subject""" fname = self.data_path(subject) From 79f342bdf0089cbaf1ab38da7fa387cfe0d29930 Mon Sep 17 00:00:00 2001 From: Bru Date: Wed, 31 May 2023 16:01:09 +0200 Subject: [PATCH 02/64] Parallel evaluation (Cross-Session, Within-Session) (#364) * Changing the cross-session to include parallel * Fixing pass with return * Changing the evaluations.py * Changing the Within evaluations.py * Reverting * Reverting again * Parallel WithinSession * Updating the evaluation, removing the yield * Updating the evaluation, removing the yield * Changing the parameter to base evaluation * Adding verbose as true * Fixing the issue =) * Updating the whats_new.rst file --------- Co-authored-by: Sylvain Chevallier --- docs/source/whats_new.rst | 2 +- moabb/evaluations/base.py | 6 +- moabb/evaluations/evaluations.py | 354 +++++++++++++++++-------------- 3 files changed, 200 insertions(+), 162 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 05bde11a5..e45ba4581 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -18,7 +18,7 @@ Develop branch Enhancements ~~~~~~~~~~~~ -- None +- Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) Bugs ~~~~ diff --git a/moabb/evaluations/base.py b/moabb/evaluations/base.py index d548503b6..38ea2bcd6 100644 --- a/moabb/evaluations/base.py +++ b/moabb/evaluations/base.py @@ -27,6 +27,9 @@ class BaseEvaluation(ABC): If not None, can guarantee same seed for shuffling examples. n_jobs: int, default=1 Number of jobs for fitting of pipeline. + n_jobs_evaluation: int, default=1 + Number of jobs for evaluation, processing in parallel the within session, + cross-session or cross-subject. overwrite: bool, default=False If true, overwrite the results. error_score: "raise" or numeric, default="raise" @@ -52,6 +55,7 @@ def __init__( datasets=None, random_state=None, n_jobs=1, + n_jobs_evaluation=1, overwrite=False, error_score="raise", suffix="", @@ -63,12 +67,12 @@ def __init__( ): self.random_state = random_state self.n_jobs = n_jobs + self.n_jobs_evaluation = n_jobs_evaluation self.error_score = error_score self.hdf5_path = hdf5_path self.return_epochs = return_epochs self.return_raws = return_raws self.mne_labels = mne_labels - # check paradigm if not isinstance(paradigm, BaseParadigm): raise (ValueError("paradigm must be an Paradigm instance")) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index e8ee23a6c..4f7230fa7 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -6,6 +6,7 @@ import joblib import numpy as np +from joblib import Parallel, delayed from mne.epochs import BaseEpochs from sklearn.base import clone from sklearn.metrics import get_scorer @@ -71,6 +72,9 @@ class WithinSessionEvaluation(BaseEvaluation): If not None, can guarantee same seed for shuffling examples. n_jobs: int, default=1 Number of jobs for fitting of pipeline. + n_jobs_evaluation: int, default=1 + Number of jobs for evaluation, processing in parallel the within session, + cross-session or cross-subject. overwrite: bool, default=False If true, overwrite the results. error_score: "raise" or numeric, default="raise" @@ -172,94 +176,106 @@ def _grid_search(self, param_grid, name_grid, name, grid_clf, X_, y_, cv): return grid_clf # flake8: noqa: C901 + def _evaluate(self, dataset, pipelines, param_grid): + results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( + delayed(self._evaluate_subject)(dataset, pipelines, param_grid, subject) + for subject in tqdm( + dataset.subject_list, desc=f"{dataset.code}-WithinSession" + ) + ) + + # Concatenate the results from all subjects + yield from [res for subject_results in results for res in subject_results] + + def _evaluate_subject(self, dataset, pipelines, param_grid, subject): # Progress Bar at subject level - for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-WithinSession"): - # check if we already have result for this subject/pipeline - # we might need a better granularity, if we query the DB - run_pipes = self.results.not_yet_computed(pipelines, dataset, subject) - if len(run_pipes) == 0: - continue + # check if we already have result for this subject/pipeline + # we might need a better granularity, if we query the DB + run_pipes = self.results.not_yet_computed(pipelines, dataset, subject) + if len(run_pipes) == 0: + return [] - # get the data - X, y, metadata = self.paradigm.get_data( - dataset, [subject], self.return_epochs, self.return_raws - ) + # get the data + X, y, metadata = self.paradigm.get_data( + dataset, [subject], self.return_epochs, self.return_raws + ) + subject_results = [] + # iterate over sessions + for session in np.unique(metadata.session): + ix = metadata.session == session - # iterate over sessions - for session in np.unique(metadata.session): - ix = metadata.session == session + for name, clf in run_pipes.items(): + if _carbonfootprint: + # Initialize CodeCarbon + tracker = EmissionsTracker(save_to_file=False, log_level="error") + tracker.start() + t_start = time() + cv = StratifiedKFold(5, shuffle=True, random_state=self.random_state) + scorer = get_scorer(self.paradigm.scoring) + le = LabelEncoder() + y_cv = le.fit_transform(y[ix]) + X_ = X[ix] + y_ = y[ix] if self.mne_labels else y_cv - for name, clf in run_pipes.items(): - if _carbonfootprint: - # Initialize CodeCarbon - tracker = EmissionsTracker(save_to_file=False, log_level="error") - tracker.start() - t_start = time() - cv = StratifiedKFold(5, shuffle=True, random_state=self.random_state) - scorer = get_scorer(self.paradigm.scoring) - le = LabelEncoder() - y_cv = le.fit_transform(y[ix]) - X_ = X[ix] - y_ = y[ix] if self.mne_labels else y_cv + grid_clf = clone(clf) - grid_clf = clone(clf) + name_grid = os.path.join( + str(self.hdf5_path), + "GridSearch_WithinSession", + dataset.code, + "subject" + str(subject), + str(session), + str(name), + ) - name_grid = os.path.join( - str(self.hdf5_path), - "GridSearch_WithinSession", - dataset.code, - "subject" + str(subject), - str(session), - str(name), - ) + # Implement Grid Search + grid_clf = self._grid_search( + param_grid, name_grid, name, grid_clf, X_, y_, cv + ) - # Implement Grid Search - grid_clf = self._grid_search( - param_grid, name_grid, name, grid_clf, X_, y_, cv + if isinstance(X, BaseEpochs): + scorer = get_scorer(self.paradigm.scoring) + acc = list() + X_ = X[ix] + y_ = y[ix] if self.mne_labels else y_cv + for train, test in cv.split(X_, y_): + cvclf = clone(grid_clf) + cvclf.fit(X_[train], y_[train]) + acc.append(scorer(cvclf, X_[test], y_[test])) + acc = np.array(acc) + else: + acc = cross_val_score( + grid_clf, + X[ix], + y_cv, + cv=cv, + scoring=self.paradigm.scoring, + n_jobs=self.n_jobs, + error_score=self.error_score, ) + score = acc.mean() + if _carbonfootprint: + emissions = tracker.stop() + if emissions is None: + emissions = np.NaN + duration = time() - t_start + nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] + res = { + "time": duration / 5.0, # 5 fold CV + "dataset": dataset, + "subject": subject, + "session": session, + "score": score, + "n_samples": len(y_cv), # not training sample + "n_channels": nchan, + "pipeline": name, + } + if _carbonfootprint: + res["carbon_emission"] = (1000 * emissions,) + subject_results.append(res) - if isinstance(X, BaseEpochs): - scorer = get_scorer(self.paradigm.scoring) - acc = list() - X_ = X[ix] - y_ = y[ix] if self.mne_labels else y_cv - for train, test in cv.split(X_, y_): - cvclf = clone(grid_clf) - cvclf.fit(X_[train], y_[train]) - acc.append(scorer(cvclf, X_[test], y_[test])) - acc = np.array(acc) - else: - acc = cross_val_score( - grid_clf, - X[ix], - y_cv, - cv=cv, - scoring=self.paradigm.scoring, - n_jobs=self.n_jobs, - error_score=self.error_score, - ) - score = acc.mean() - if _carbonfootprint: - emissions = tracker.stop() - if emissions is None: - emissions = np.NaN - duration = time() - t_start - nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] - res = { - "time": duration / 5.0, # 5 fold CV - "dataset": dataset, - "subject": subject, - "session": session, - "score": score, - "n_samples": len(y_cv), # not training sample - "n_channels": nchan, - "pipeline": name, - } - if _carbonfootprint: - res["carbon_emission"] = (1000 * emissions,) - - yield res + return subject_results def get_data_size_subsets(self, y): if self.data_size is None: @@ -421,6 +437,9 @@ class CrossSessionEvaluation(BaseEvaluation): If not None, can guarantee same seed for shuffling examples. n_jobs: int, default=1 Number of jobs for fitting of pipeline. + n_jobs_evaluation: int, default=1 + Number of jobs for evaluation, processing in parallel the within session, + cross-session or cross-subject. overwrite: bool, default=False If true, overwrite the results. error_score: "raise" or numeric, default="raise" @@ -480,98 +499,110 @@ def evaluate(self, dataset, pipelines, param_grid): if not self.is_valid(dataset): raise AssertionError("Dataset is not appropriate for evaluation") # Progressbar at subject level - for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-CrossSession"): - # check if we already have result for this subject/pipeline - # we might need a better granularity, if we query the DB - run_pipes = self.results.not_yet_computed(pipelines, dataset, subject) - if len(run_pipes) == 0: - continue + results = [] + for result in Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( + delayed(self.process_subject)(subject, param_grid, pipelines, dataset) + for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-CrossSession") + ): + results.extend(result) - # get the data - X, y, metadata = self.paradigm.get_data( - dataset=dataset, - subjects=[subject], - return_epochs=self.return_epochs, - return_raws=self.return_raws, - ) - le = LabelEncoder() - y = y if self.mne_labels else le.fit_transform(y) - groups = metadata.session.values - scorer = get_scorer(self.paradigm.scoring) + return results - for name, clf in run_pipes.items(): - if _carbonfootprint: - # Initialise CodeCarbon - tracker = EmissionsTracker(save_to_file=False, log_level="error") - tracker.start() + def process_subject(self, subject, param_grid, pipelines, dataset): + # check if we already have result for this subject/pipeline + # we might need a better granularity, if we query the DB + run_pipes = self.results.not_yet_computed(pipelines, dataset, subject) + if len(run_pipes) == 0: + print(f"Subject {subject} already processed") + return [] - # we want to store a results per session - cv = LeaveOneGroupOut() + # get the data + X, y, metadata = self.paradigm.get_data( + dataset=dataset, + subjects=[subject], + return_epochs=self.return_epochs, + return_raws=self.return_raws, + ) + le = LabelEncoder() + y = y if self.mne_labels else le.fit_transform(y) + groups = metadata.session.values + scorer = get_scorer(self.paradigm.scoring) - grid_clf = clone(clf) + results = [] + for name, clf in run_pipes.items(): + if _carbonfootprint: + # Initialise CodeCarbon + tracker = EmissionsTracker(save_to_file=False, log_level="error") + tracker.start() - # Load result if the folder exist - name_grid = os.path.join( - str(self.hdf5_path), - "GridSearch_CrossSession", - dataset.code, - str(subject), - name, - ) + # we want to store a results per session + cv = LeaveOneGroupOut() - # Implement Grid Search - grid_clf = self._grid_search( - param_grid, name_grid, name, grid_clf, X, y, cv, groups - ) + grid_clf = clone(clf) - if _carbonfootprint: - emissions_grid = tracker.stop() - if emissions_grid is None: - emissions_grid = 0 + # Load result if the folder exist + name_grid = os.path.join( + str(self.hdf5_path), + "GridSearch_CrossSession", + dataset.code, + str(subject), + name, + ) - for train, test in cv.split(X, y, groups): - if _carbonfootprint: - tracker.start() - t_start = time() - if isinstance(X, BaseEpochs): - cvclf = clone(grid_clf) - cvclf.fit(X[train], y[train]) - score = scorer(cvclf, X[test], y[test]) - else: - result = _fit_and_score( - clone(grid_clf), - X, - y, - scorer, - train, - test, - verbose=False, - parameters=None, - fit_params=None, - error_score=self.error_score, - ) - score = result["test_scores"] - if _carbonfootprint: - emissions = tracker.stop() - if emissions is None: - emissions = 0 + # Implement Grid Search + grid_clf = self._grid_search( + param_grid, name_grid, name, grid_clf, X, y, cv, groups + ) - duration = time() - t_start - nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] - res = { - "time": duration, - "dataset": dataset, - "subject": subject, - "session": groups[test][0], - "score": score, - "n_samples": len(train), - "n_channels": nchan, - "pipeline": name, - } - if _carbonfootprint: - res["carbon_emission"] = (1000 * (emissions + emissions_grid),) + if _carbonfootprint: + emissions_grid = tracker.stop() + if emissions_grid is None: + emissions_grid = 0 - yield res + for train, test in cv.split(X, y, groups): + if _carbonfootprint: + tracker.start() + t_start = time() + if isinstance(X, BaseEpochs): + cvclf = clone(grid_clf) + cvclf.fit(X[train], y[train]) + score = scorer(cvclf, X[test], y[test]) + else: + result = _fit_and_score( + clone(grid_clf), + X, + y, + scorer, + train, + test, + verbose=False, + parameters=None, + fit_params=None, + error_score=self.error_score, + ) + score = result["test_scores"] + if _carbonfootprint: + emissions = tracker.stop() + if emissions is None: + emissions = 0 + + duration = time() - t_start + nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] + res = { + "time": duration, + "dataset": dataset, + "subject": subject, + "session": groups[test][0], + "score": score, + "n_samples": len(train), + "n_channels": nchan, + "pipeline": name, + } + if _carbonfootprint: + res["carbon_emission"] = (1000 * (emissions + emissions_grid),) + + results.append(res) + return results def is_valid(self, dataset): return dataset.n_sessions > 1 @@ -594,6 +625,9 @@ class CrossSubjectEvaluation(BaseEvaluation): If not None, can guarantee same seed for shuffling examples. n_jobs: int, default=1 Number of jobs for fitting of pipeline. + n_jobs_evaluation: int, default=1 + Number of jobs for evaluation, processing in parallel the within session, + cross-session or cross-subject. overwrite: bool, default=False If true, overwrite the results. error_score: "raise" or numeric, default="raise" From db92e510c29eb61414ae760054bb302a8dfa5661 Mon Sep 17 00:00:00 2001 From: Bru Date: Wed, 31 May 2023 18:51:35 +0200 Subject: [PATCH 03/64] Adding redudance deploy (#374) * Adding new deploy and updating whats_new.rst * Changing folder to doc * Update docs/source/whats_new.rst * Separating workflow * trying to fix the pre-commit issue * trying to fix the pre-commit issue * Changing Docs configuration * Updating the whats_new.rst --------- Co-authored-by: Sylvain Chevallier --- .github/workflows/docs.yml | 78 +++++--------------------------------- docs/source/whats_new.rst | 1 + 2 files changed, 11 insertions(+), 68 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 94aa9235c..6ed2cd9df 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -91,17 +91,6 @@ jobs: path: moabb-ghio token: ${{ secrets.MOABB_GHIO }} - - name: Deploy on moabb.neurotechx.com - run: | - git config --global user.email "ci@neurotechx.com" - git config --global user.name "Github Actions" - cd ~/work/moabb/moabb/moabb-ghio - rm -Rf docs - cp -a ~/work/moabb/moabb/docs/build/html ./docs - git add -A - git commit -m "GH Actions update of docs ($GITHUB_RUN_ID - $GITHUB_RUN_NUMBER)" - git push origin master - deploy_gh_pages: if: ${{ github.ref == 'refs/heads/develop' }} needs: build_docs @@ -133,6 +122,16 @@ jobs: ref: gh-pages path: moabb-ghpages + - name: Deploy Neurotechx Subpage + uses: peaceiris/actions-gh-pages@v3 + with: + deploy_key: ${{ secrets.ACTIONS_DEPLOY_KEY }} + external_repository: NeuroTechX/moabb.github.io + destination_dir: docs/ + publish_branch: master + publish_dir: ./docs/build/html + cname: moabb.neurotechx.com/ + - name: Deploy on gh-pages run: | git config --global user.email "ci@neurotechx.com" @@ -143,60 +142,3 @@ jobs: git add -A git commit -m "GH Actions update of GH pages ($GITHUB_RUN_ID - $GITHUB_RUN_NUMBER)" git push origin gh-pages - - - name: Deploy to moabb.neurotechx.com/ - if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/develop'}} - uses: peaceiris/actions-gh-pages@v3 - with: - deploy_key: ${{ secrets.ACTIONS_DEPLOY_KEY }} - external_repository: NeuroTechX/moabb.github.io - destination_dir: docs/ - publish_branch: master - publish_dir: ./docs/build/html - cname: moabb.neurotechx.com/ - - # Previous test with moabb GH pages, official docs point to moabb.github.io - ########################################################################### - # Since we want the URL to be neurotechx.github.io/docs/ the html output needs to be put in a ./docs subfolder of the publish_dir - # - name: Move docs into site folder - # run: | - # mkdir site - # mv docs/build/html site/docs - - # - name: Deploy on moabb gh-pages - # uses: peaceiris/actions-gh-pages@v3 - # if: github.ref == 'refs/heads/master' - # with: - # github_token: ${{ secrets.GITHUB_TOKEN }} - # publish_dir: site - - # Using checkout and push actions, not working - ############################################## - # - name: Install SSH key - # uses: shimataro/ssh-key-action@v2 - # with: - # key: ${{ secrets.MOABB_DOCS_SSH }} - # known_hosts: ${{ secrets.KNOWN_HOST_GH }} - - # - name: Checkout moabb.github.io - # uses: actions/checkout@v2 - # with: - # repository: "NeuroTechX/moabb.github.io" - # path: moabb-ghio - # fetch-depth: 0 - # persist-credentials: false - - # - name: Add html files - # run: | - # cd ~/work/moabb/moabb/moabb-ghio - # rm -Rf docs - # cp -a ~/work/moabb/moabb/docs/build/html ./docs - # git config --global user.email "ci@neurotechx.com" - # git config --global user.name "Github Actions" - # git commit -m "GH Actions update of docs ($GITHUB_RUN_ID - $GITHUB_RUN_NUMBER)" -a - - # - name: Push - # uses: ad-m/github-push-action@master - # with: - # github_token: ${{ secrets.MOABB_GHIO }} - # repository: "NeuroTechX/moabb.github.io" diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index e45ba4581..0acaa566d 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -18,6 +18,7 @@ Develop branch Enhancements ~~~~~~~~~~~~ +- Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) Bugs From e189e356ff4adb97d84c4bbdca248870d753b591 Mon Sep 17 00:00:00 2001 From: Sara Sedlar Date: Wed, 31 May 2023 23:03:47 +0200 Subject: [PATCH 04/64] Pipelines p300 (#377) * Add an example of comparison of classification of the ERP covariances without and with Xdawn spatial filtering using MDM and tangent space classifiers. * Fix description * [pre-commit.ci] auto fixes from pre-commit.com hooks * Adding P300 pipelines * [pre-commit.ci] auto fixes from pre-commit.com hooks * Update pipelines/xDAWN+LDA.yml Co-authored-by: Sylvain Chevallier * Rename pipeline script names * Replace Vectorizer class by one from mne. * [pre-commit.ci] auto fixes from pre-commit.com hooks * Remove imports of BaseEstimator, TransformerMixin * Remove import of numpy * Adding XdanCov + TangentSpace + SVM pipeline. * Remove test example * Change estimator to oas and xdawn_estimator to scm --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru Co-authored-by: Sylvain Chevallier --- .../plot_learning_curve_p300.py | 22 +------------- examples/plot_within_session_p300.py | 23 +-------------- pipelines/ERPCov_MDM.yml | 16 ++++++++++ pipelines/XdawnCov_MDM.yml | 18 ++++++++++++ pipelines/XdawnCov_TS_SVM.yml | 29 +++++++++++++++++++ pipelines/xDAWN_LDA.yml | 19 ++++++++++++ 6 files changed, 84 insertions(+), 43 deletions(-) create mode 100644 pipelines/ERPCov_MDM.yml create mode 100644 pipelines/XdawnCov_MDM.yml create mode 100644 pipelines/XdawnCov_TS_SVM.yml create mode 100644 pipelines/xDAWN_LDA.yml diff --git a/examples/learning_curve/plot_learning_curve_p300.py b/examples/learning_curve/plot_learning_curve_p300.py index 74260f0b5..7055b779b 100644 --- a/examples/learning_curve/plot_learning_curve_p300.py +++ b/examples/learning_curve/plot_learning_curve_p300.py @@ -24,10 +24,10 @@ import matplotlib.pyplot as plt import numpy as np import seaborn as sns +from mne.decoding import Vectorizer from pyriemann.estimation import XdawnCovariances from pyriemann.spatialfilters import Xdawn from pyriemann.tangentspace import TangentSpace -from sklearn.base import BaseEstimator, TransformerMixin from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.pipeline import make_pipeline @@ -44,26 +44,6 @@ moabb.set_log_level("info") -############################################################################## -# This is an auxiliary transformer that allows one to vectorize data -# structures in a pipeline For instance, in the case of a X with dimensions -# Nt x Nc x Ns, one might be interested in a new data structure with -# dimensions Nt x (Nc.Ns) - - -class Vectorizer(BaseEstimator, TransformerMixin): - def __init__(self): - pass - - def fit(self, X, y): - """fit.""" - return self - - def transform(self, X): - """transform.""" - return np.reshape(X, (X.shape[0], -1)) - - ############################################################################## # Create Pipelines # ---------------- diff --git a/examples/plot_within_session_p300.py b/examples/plot_within_session_p300.py index bff442f52..59c9face8 100644 --- a/examples/plot_within_session_p300.py +++ b/examples/plot_within_session_p300.py @@ -21,11 +21,10 @@ import warnings import matplotlib.pyplot as plt -import numpy as np import seaborn as sns +from mne.decoding import Vectorizer from pyriemann.estimation import Xdawn, XdawnCovariances from pyriemann.tangentspace import TangentSpace -from sklearn.base import BaseEstimator, TransformerMixin from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.pipeline import make_pipeline @@ -42,26 +41,6 @@ moabb.set_log_level("info") -############################################################################## -# This is an auxiliary transformer that allows one to vectorize data -# structures in a pipeline For instance, in the case of an X with dimensions -# Nt x Nc x Ns, one might be interested in a new data structure with -# dimensions Nt x (Nc.Ns) - - -class Vectorizer(BaseEstimator, TransformerMixin): - def __init__(self): - pass - - def fit(self, X, y): - """fit.""" - return self - - def transform(self, X): - """transform.""" - return np.reshape(X, (X.shape[0], -1)) - - ############################################################################## # Create Pipelines # ---------------- diff --git a/pipelines/ERPCov_MDM.yml b/pipelines/ERPCov_MDM.yml new file mode 100644 index 000000000..0a7daf42f --- /dev/null +++ b/pipelines/ERPCov_MDM.yml @@ -0,0 +1,16 @@ +name: ERPCovariances + MDM + +paradigms: + - P300 + +citations: + - https://doi.org/10.48550/arXiv.1409.0107 + +pipeline: + - name: ERPCovariances + from: pyriemann.estimation + parameters: + estimator: oas + + - name: MDM + from: pyriemann.classification diff --git a/pipelines/XdawnCov_MDM.yml b/pipelines/XdawnCov_MDM.yml new file mode 100644 index 000000000..0812df978 --- /dev/null +++ b/pipelines/XdawnCov_MDM.yml @@ -0,0 +1,18 @@ +name: XdawnCovariances + MDM + +paradigms: + - P300 + +citations: + - https://doi.org/10.1109/NER49283.2021.9441279 + - https://citeseerx.ist.psu.edu/doc/10.1.1.713.5131 + +pipeline: + - name: XdawnCovariances + from: pyriemann.estimation + parameters: + estimator: oas + xdawn_estimator: scm + + - name: MDM + from: pyriemann.classification diff --git a/pipelines/XdawnCov_TS_SVM.yml b/pipelines/XdawnCov_TS_SVM.yml new file mode 100644 index 000000000..28fe35125 --- /dev/null +++ b/pipelines/XdawnCov_TS_SVM.yml @@ -0,0 +1,29 @@ +name: XdawnCovariances + TangentSpace + SVM + +paradigms: + - P300 + +citations: + - https://dx.doi.org/10.1007/978-3-030-01424-7_65 + +pipeline: + - name: XdawnCovariances + from: pyriemann.estimation + parameters: + estimator: oas + xdawn_estimator: scm + + - name: TangentSpace + from: pyriemann.tangentspace + + - name: SVC + from: sklearn.svm + +param_grid: + svc__C: + - 0.1 + - 1.0 + - 10 + svc__kernel: + - "rbf" + - "linear" diff --git a/pipelines/xDAWN_LDA.yml b/pipelines/xDAWN_LDA.yml new file mode 100644 index 000000000..f230287e0 --- /dev/null +++ b/pipelines/xDAWN_LDA.yml @@ -0,0 +1,19 @@ +name: xDAWN + LDA + +paradigms: + - P300 + +citations: + - https://doi.org/10.1109/TBME.2009.2012869 + +pipeline: + - name: Xdawn + from: pyriemann.spatialfilters + parameters: + estimator: oas + + - name: Vectorizer + from: mne.decoding + + - name: LinearDiscriminantAnalysis + from: sklearn.discriminant_analysis From f153cf439f8504b43992a81b4b6e4555203f44c5 Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Fri, 2 Jun 2023 14:00:24 +0200 Subject: [PATCH 05/64] Rename event in Shin2017B - Fixes #388 (#397) * Rename event in Shin2017B - Fixes #388 * Update whats_new.rst --- docs/source/whats_new.rst | 1 + moabb/datasets/bbci_eeg_fnirs.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 0acaa566d..5880c89bf 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -25,6 +25,7 @@ Bugs ~~~~ - Restore 3 subject from Cho2017 (:gh:`392` by `Igor Carrara`_ and `Sylvain Chevallier`_) +- Rename event `substraction` to `subtraction` in :func:`moabb.dataset.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/bbci_eeg_fnirs.py b/moabb/datasets/bbci_eeg_fnirs.py index 5554ed4eb..56e771d63 100644 --- a/moabb/datasets/bbci_eeg_fnirs.py +++ b/moabb/datasets/bbci_eeg_fnirs.py @@ -97,7 +97,7 @@ def __init__( n_sessions += 3 if mental_arithmetic: - events.update(dict(substraction=3, rest=4)) + events.update(dict(subtraction=3, rest=4)) paradigms.append("arithmetic") n_sessions += 3 From 2938fcc6bd45a7ac3a56c8638b0721c33aac2720 Mon Sep 17 00:00:00 2001 From: Bru Date: Fri, 2 Jun 2023 16:17:59 +0200 Subject: [PATCH 06/64] Update dataset info (#389) * Fixing data meta info * Fixing description Cho and BNCI * Fixing order * Fixing the SSVEP and P300 * Adding new script * Fixing saving * Updating script * Fixing columns * Updating the script to process trial/events * Updating the whats_new.rst * Update docs/source/dataset_summary.rst Co-authored-by: Sylvain Chevallier * Update docs/source/dataset_summary.rst Co-authored-by: Sylvain Chevallier * Update docs/source/dataset_summary.rst Co-authored-by: Sylvain Chevallier * Update moabb/datasets/gigadb.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Update scripts/generating_metainfo.py Co-authored-by: Sylvain Chevallier * Fixing small things --------- Co-authored-by: Sylvain Chevallier --- docs/source/dataset_summary.rst | 46 ++++---- docs/source/whats_new.rst | 2 + moabb/datasets/Lee2019.py | 2 +- moabb/datasets/bnci.py | 10 +- moabb/datasets/braininvaders.py | 6 +- moabb/datasets/gigadb.py | 2 +- moabb/datasets/ssvep_wang.py | 2 +- scripts/generating_metainfo.py | 194 ++++++++++++++++++++++++++++++++ 8 files changed, 230 insertions(+), 34 deletions(-) create mode 100644 scripts/generating_metainfo.py diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst index 5e80f2bae..3b47ad898 100644 --- a/docs/source/dataset_summary.rst +++ b/docs/source/dataset_summary.rst @@ -17,25 +17,25 @@ Motor Imagery ====================== .. csv-table:: - :header: Dataset, #Subj, #Chan, #Classes, #Trials, len, Sampling rate, #Sessions, #Trials*#Sessions + :header: Dataset, #Subj, #Chan, #Classes, #Trials, Trial length, Freq, #Session, #Runs, Total_trials :class: sortable - AlexMI,8,16,3,20,3s,512Hz,1,20 - BNCI2014001,10,22,4,144,4s,250Hz,2,288 - BNCI2014002,15,15,2,80,5s,512Hz,1,80 - BNCI2014004,10,3,2,360,4.5s,250Hz,5,1800 - BNCI2015001,13,13,2,200,5s,512Hz,2,400 - BNCI2015004,10,30,5,80,7s,256Hz,2,160 - Cho2017,53,64,2,100,3s,512Hz,1,100 - Lee2019_MI,55,62,2,100,4s,1000Hz,2,200 - MunichMI,10,128,2,150,7s,500Hz,1,150 - Schirrmeister2017,14,128,4,120,4s,500Hz,1,120 - Ofner2017,15,61,7,60,3s,512Hz,1,60 - PhysionetMI,109,64,4,23,3s,160Hz,1,23 - Shin2017A,29,30,2,30,10s,200Hz,3,90 - Shin2017B,29,30,2,30,10s,200Hz,3,90 - Weibo2014,10,60,7,80,4s,200Hz,1,80 - Zhou2016,4,14,3,160,5s,250Hz,3,480 + AlexMI,8,16,3,20,3s,512Hz,1,1,480 + BNCI2014001,9,22,4,144,4s,250Hz,2,6,62208 + BNCI2014002,14,15,2,80,5s,512Hz,1,8,17920 + BNCI2014004,9,3,2,360,4.5s,250Hz,5,1,32400 + BNCI2015001,12,13,2,200,5s,512Hz,3,1,14400 + BNCI2015004,9,30,5,80,7s,256Hz,2,1,7200 + Cho2017,52,64,2,100,3s,512Hz,1,1,9800 + Lee2019_MI,55,62,2,100,4s,1000Hz,2,1,11000 + MunichMI,10,128,2,150,7s,500Hz,1,1,3000 + Schirrmeister2017,14,128,4,120,4s,500Hz,1,2,13440 + Ofner2017,15,61,7,60,3s,512Hz,1,10,63000 + PhysionetMI,109,64,4,23,3s,160Hz,1,1,69760 + Shin2017A,29,30,2,30,10s,200Hz,3,1,5220 + Shin2017B,29,30,2,30,10s,200Hz,3,1,5220 + Weibo2014,10,60,7,80,4s,200Hz,1,1,5600 + Zhou2016,4,14,3,160,5s,250Hz,3,2,11496 P300/ERP ====================== @@ -49,10 +49,10 @@ P300/ERP BNCI2015003, 10, 8, 1500 NT / 300 T, 0.8s, 256Hz, 1 bi2012, 25, 16, 6140 NT / 128 T, 1s, 512Hz, 2 bi2013a, 24, 16, 3200 NT / 640 T, 1s, 512Hz, 8 for subjects 1-7 else 1 - bi2014a, 71, 16, , 1s, 512Hz, up to 3 - bi2014b, 38, 32, , 1s, 512Hz, 3 - bi2015a, 50, 32, , 1s, 512Hz, 3 - bi2015b, 44, 32, , 1s, 512Hz, 2 + bi2014a, 64, 16, 990 NT / 198 T, 1s, 512Hz, up to 3 + bi2014b, 37, 32, 200 NT / 40 T, 1s, 512Hz, 3 + bi2015a, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 + bi2015b, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 2 VirtualReality, 24, 16, 600 NT / 120 T, 1s, 512Hz, 2 Huebner2017, 13, 31, , 0.9s, 1000Hz, 1 Huebner2018, 12, 31, , 0.9s, 1000Hz, 1 @@ -68,13 +68,13 @@ SSVEP :header: Dataset, #Subj, #Chan, #Classes, #Trials / class, Trials length, Sampling rate, #Sessions :class: sortable - Lee2019_SSVEP,24,16,4,25,1s,1000Hz,1 + Lee2019_SSVEP,54,16,4,25,1s,1000Hz,1 SSVEPExo,12,8,4,16,2s,256Hz,1 MAMEM1,10,256,5,12-15,3s,250Hz,1 MAMEM2,10,256,5,20-30,3s,250Hz,1 MAMEM3,10,14,4,20-30,3s,128Hz,1 Nakanishi2015,9,8,12,15,4.15s,256Hz,1 - Wang2016,32,62,40,6,5s,250Hz,1 + Wang2016,34,62,40,6,5s,250Hz,1 diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 5880c89bf..b08689aaf 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -18,6 +18,8 @@ Develop branch Enhancements ~~~~~~~~~~~~ +- Adding new script to get the meta information of the datasets (:gh:`389` by `Bruno Aristimunha`_) +- Fixing the dataset description based on the meta information (:gh:`389` by `Bruno Aristimunha`_) - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py index ae3e9c667..d4e35fbd2 100644 --- a/moabb/datasets/Lee2019.py +++ b/moabb/datasets/Lee2019.py @@ -395,7 +395,7 @@ class Lee2019_SSVEP(Lee2019): ============= ======= ======= ========== ================= =============== =============== =========== Name #Subj #Chan #Classes #Trials / class Trials length Sampling rate #Sessions ============= ======= ======= ========== ================= =============== =============== =========== - Lee2019_SSVEP 24 16 4 25 1s 1000Hz 1 + Lee2019_SSVEP 54 16 4 25 1s 1000Hz 1 ============= ======= ======= ========== ================= =============== =============== =========== Dataset from Lee et al 2019 [1]_. diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py index 81a3b40ce..51b4bdbb9 100644 --- a/moabb/datasets/bnci.py +++ b/moabb/datasets/bnci.py @@ -686,7 +686,7 @@ class BNCI2014001(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014001 10 22 4 144 4s 250Hz 2 + BNCI2014001 9 22 4 144 4s 250Hz 2 =========== ======= ======= ========== ================= ============ =============== =========== Dataset IIa from BCI Competition 4 [1]_. @@ -750,7 +750,7 @@ class BNCI2014002(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014002 15 15 2 80 5s 512Hz 1 + BNCI2014002 14 15 2 80 5s 512Hz 1 =========== ======= ======= ========== ================= ============ =============== =========== Motor Imagery Dataset from [1]_. @@ -813,7 +813,7 @@ class BNCI2014004(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014004 10 3 2 360 4.5s 250Hz 5 + BNCI2014004 9 3 2 360 4.5s 250Hz 5 =========== ======= ======= ========== ================= ============ =============== =========== Dataset B from BCI Competition 2008. @@ -1030,7 +1030,7 @@ class BNCI2015001(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2015001 13 13 2 200 5s 512Hz 2 + BNCI2015001 12 13 2 200 5s 512Hz 2 =========== ======= ======= ========== ================= ============ =============== =========== Dataset from [1]_. @@ -1131,7 +1131,7 @@ class BNCI2015004(MNEBNCI): =========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2015004 10 30 5 80 7s 256Hz 2 + BNCI2015004 9 30 5 80 7s 256Hz 2 =========== ======= ======= ========== ================= ============ =============== =========== Dataset from [1]_. diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index 42d01ee95..647281287 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -576,7 +576,7 @@ class bi2014a(BaseDataset): ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2014a 71 16 5 NT x 1 T 1s 512Hz up to 3 + bi2014a 64 16 5 NT x 1 T 1s 512Hz up to 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings of 71 subjects @@ -634,7 +634,7 @@ class bi2014b(BaseDataset): ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2014b 38 32 5 NT x 1 T 1s 512Hz 3 + bi2014b 37 32 5 NT x 1 T 1s 512Hz 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings of 38 subjects playing in @@ -693,7 +693,7 @@ class bi2015a(BaseDataset): ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2015a 50 32 5 NT x 1 T 1s 512Hz 3 + bi2015a 43 32 5 NT x 1 T 1s 512Hz 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings diff --git a/moabb/datasets/gigadb.py b/moabb/datasets/gigadb.py index cd1002f53..cccaa015e 100644 --- a/moabb/datasets/gigadb.py +++ b/moabb/datasets/gigadb.py @@ -27,7 +27,7 @@ class Cho2017(BaseDataset): ======= ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions ======= ======= ======= ========== ================= ============ =============== =========== - Cho2017 53 64 2 100 3s 512Hz 1 + Cho2017 52 64 2 100 3s 512Hz 1 ======= ======= ======= ========== ================= ============ =============== =========== Dataset from the paper [1]_. diff --git a/moabb/datasets/ssvep_wang.py b/moabb/datasets/ssvep_wang.py index cc114ed86..8c25e4332 100644 --- a/moabb/datasets/ssvep_wang.py +++ b/moabb/datasets/ssvep_wang.py @@ -30,7 +30,7 @@ class Wang2016(BaseDataset): ======== ======= ======= ========== ================= =============== =============== =========== Name #Subj #Chan #Classes #Trials / class Trials length Sampling rate #Sessions ======== ======= ======= ========== ================= =============== =============== =========== - Wang2016 32 62 40 6 5s 250Hz 1 + Wang2016 34 62 40 6 5s 250Hz 1 ======== ======= ======= ========== ================= =============== =============== =========== Dataset from [1]_. diff --git a/scripts/generating_metainfo.py b/scripts/generating_metainfo.py new file mode 100644 index 000000000..5cbf44765 --- /dev/null +++ b/scripts/generating_metainfo.py @@ -0,0 +1,194 @@ +from argparse import ArgumentParser +from pathlib import Path + +import mne +import numpy as np +import pandas as pd + +import moabb +from moabb.datasets.utils import dataset_search +from moabb.utils import set_download_dir + + +columns_name = [ + "Dataset", + "#Subj", + "#Chan", + "#Classes", + "trials/events", + "Window Size (s)", + "Freq (Hz)", + "#Session", + "#Runs", + "Total_trials", +] + + +def parser_init(): + parser = ArgumentParser(description="Getting the meta-information script for MOABB") + + parser.add_argument( + "-mne_p", + "--mne_data", + dest="mne_data", + default=Path.home() / "mne_data", + type=Path, + help="Folder where to save and load the datasets with mne structure.", + ) + + return parser + + +def process_trial_freq(trials_per_events, prdgm): + """ + Function to process the trial frequency. + Getting the median value if the paradigm is MotorImagery. + + Parameters + ---------- + trials_per_events: dict + prdgm: str + + Returns + ------- + trial_freq: str + """ + class_per_trial = list(trials_per_events.values()) + + if prdgm == "imagery" or prdgm == "ssvep": + return f"{int(np.median(class_per_trial))}" + elif prdgm == "p300": + not_target = max(trials_per_events.values()) + target = min(trials_per_events.values()) + return f"NT{not_target} / T {target}" + + +def get_meta_info(dataset, dataset_name, paradigm, prdgm_name): + """ + Function to get the meta-information of a dataset. + + Parameters + ---------- + dataset: BaseDataset + Dataset object + dataset_name: str + Dataset name + paradigm: BaseParadigm + Paradigm object to process the dataset + prdgm_name: str + Paradigm name + + Returns + ------- + + """ + subjects = len(dataset.subject_list) + session = dataset.n_sessions + + X, _, metadata = paradigm.get_data(dataset, [1], return_epochs=True) + + sfreq = int(X.info["sfreq"]) + nchan = X.info["nchan"] + runs = len(metadata["run"].unique()) + classes = len(X.event_id) + epoch_size = X.tmax - X.tmin + + trials_per_events = mne.count_events(X.events) + total_trials = int(sum(trials_per_events.values())) + trial_class = process_trial_freq(trials_per_events, prdgm_name) + + info_dataset = pd.Series( + [ + dataset_name, + subjects, + nchan, + classes, + trial_class, + epoch_size, + sfreq, + session, + runs, + session * runs * total_trials * subjects, + ], + index=columns_name, + ) + + return info_dataset + + +if __name__ == "__main__": + mne.set_log_level(False) + + parser = parser_init() + options = parser.parse_args() + mne_path = Path(options.mne_data) + + set_download_dir(mne_path) + + paradigms = {} + paradigms["imagery"] = moabb.paradigms.MotorImagery() + paradigms["ssvep"] = moabb.paradigms.SSVEP() + paradigms["p300"] = moabb.paradigms.P300() + + for prdgm_name, paradigm in paradigms.items(): + dataset_list = dataset_search(paradigm=prdgm_name) + + metainfo = [] + for dataset in dataset_list: + dataset_name = str(dataset).split(".")[-1].split(" ")[0] + + dataset_path = f"{mne_path.parent}/metainfo/metainfo_{dataset_name}.csv" + + if not dataset_path.exists(): + print( + "Trying to get the meta information from the " + f"dataset {dataset} with {prdgm_name}" + ) + + try: + info_dataset = get_meta_info( + dataset, dataset_name, paradigm, prdgm_name + ) + print( + "Saving the meta information for the dataset in the file: ", + dataset_path, + ) + info_dataset.to_csv(dataset_path) + metainfo.append(info_dataset) + + except Exception as ex: + print(f"Error with {dataset} with {prdgm_name} paradigm", end=" ") + print(f"Error: {ex}") + + if prdgm_name == "imagery": + print("Trying with the LeftRightImagery paradigm") + prdgm2 = moabb.paradigms.LeftRightImagery() + try: + info_dataset = get_meta_info( + dataset, dataset_name, prdgm2, prdgm_name + ) + print( + "Saving the meta information for the dataset in the file: ", + dataset_path, + ) + info_dataset.to_csv(dataset_path) + metainfo.append(info_dataset) + + except Exception as ex: + print( + f"Error with {dataset} with {prdgm_name} paradigm", + end=" ", + ) + print(f"Error: {ex}") + else: + print(f"Loading the meta information from {dataset_path}") + info_dataset = pd.read_csv(dataset_path) + metainfo.append(info_dataset) + + paradigm_df = pd.concat(metainfo, axis=1).T + + paradigm_df.columns = columns_name + dataset_path = mne_path.parent / "metainfo" / f"metainfo_{dataset_name}.csv" + print(f"Saving the meta information for the paradigm {dataset_path}") + + paradigm_df.to_csv(dataset_path, index=None) From 4649f73a96b0497e9da4ac12f24c371869689346 Mon Sep 17 00:00:00 2001 From: Bru Date: Wed, 7 Jun 2023 13:35:07 +0200 Subject: [PATCH 07/64] Updating README.md (#390) * Updating README.md * Updating whats_new.rst * Updating README --------- Co-authored-by: Sylvain Chevallier --- docs/source/README.md | 14 ++++++++++++++ docs/source/whats_new.rst | 2 ++ 2 files changed, 16 insertions(+) diff --git a/docs/source/README.md b/docs/source/README.md index b90af6b20..8142dfad2 100644 --- a/docs/source/README.md +++ b/docs/source/README.md @@ -146,6 +146,20 @@ fetch(endpoint)
+Special acknowledge for the extra MOABB contributors: + + + + + + + + + + + +
Pedro Rodrigues
 Pedro L. C. Rodrigues
+ ### What do we need? **You**! In whatever way you can help. diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index b08689aaf..e4913d9a6 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -18,6 +18,8 @@ Develop branch Enhancements ~~~~~~~~~~~~ + +- Adding extra thank you section in the documentation (:gh:`390` by `Bruno Aristimunha`_) - Adding new script to get the meta information of the datasets (:gh:`389` by `Bruno Aristimunha`_) - Fixing the dataset description based on the meta information (:gh:`389` by `Bruno Aristimunha`_) - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) From 3375c7e89cdd73fabfbfaa66ab2f886a9e312753 Mon Sep 17 00:00:00 2001 From: Bru Date: Wed, 7 Jun 2023 16:43:01 +0200 Subject: [PATCH 08/64] Update dataset descript (#398) * Updating README.md * Updating the data description * Updating data description and class dataset info * Updating whats_new file * Updating whats_new file * Fixing session --- docs/source/dataset_summary.rst | 7 ++++--- docs/source/whats_new.rst | 4 ++-- moabb/datasets/huebner_llp.py | 4 ++-- moabb/datasets/sosulski2019.py | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst index 3b47ad898..0d12df98d 100644 --- a/docs/source/dataset_summary.rst +++ b/docs/source/dataset_summary.rst @@ -54,12 +54,13 @@ P300/ERP bi2015a, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 bi2015b, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 2 VirtualReality, 24, 16, 600 NT / 120 T, 1s, 512Hz, 2 - Huebner2017, 13, 31, , 0.9s, 1000Hz, 1 - Huebner2018, 12, 31, , 0.9s, 1000Hz, 1 - Sosulski2019, 13, 32, 75 NT / 15 T, , 1000Hz, 1 + Huebner2017, 13, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 + Huebner2018, 12, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 + Sosulski2019, 13, 31, 75 NT / 15 T, , 1000Hz, 3 EPFLP300, 8, 32, 2753 NT / 551 T, 1s, 2048Hz, 4 Lee2019_ERP, 54, 62, 6900 NT / 1380 T, 1s, 1000Hz, 2 + SSVEP ====================== diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index e4913d9a6..e9c1d25e9 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -21,7 +21,7 @@ Enhancements - Adding extra thank you section in the documentation (:gh:`390` by `Bruno Aristimunha`_) - Adding new script to get the meta information of the datasets (:gh:`389` by `Bruno Aristimunha`_) -- Fixing the dataset description based on the meta information (:gh:`389` by `Bruno Aristimunha`_) +- Fixing the dataset description based on the meta information (:gh:`389` and `398` by `Bruno Aristimunha`_ and `Sara Sedlar`_) - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) @@ -317,7 +317,7 @@ Bugs API changes ~~~~~~~~~~~ - None - +.. _Sara Sedlar: https://github.com/Sara04 .. _Emmanuel Kalunga: https://github.com/emmanuelkalunga .. _Gregoire Cattan: https://github.com/gcattan .. _Igor Carrara: https://github.com/carraraig diff --git a/moabb/datasets/huebner_llp.py b/moabb/datasets/huebner_llp.py index be04eecc3..60f5e369c 100644 --- a/moabb/datasets/huebner_llp.py +++ b/moabb/datasets/huebner_llp.py @@ -114,7 +114,7 @@ class Huebner2017(_BaseVisualMatrixSpellerDataset): =========== ======= ======= ================= =============== =============== =========== Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions =========== ======= ======= ================= =============== =============== =========== - Huebner2017 13 31 0.9s 1000Hz 1 + Huebner2017 13 31 364 NT / 112 T 0.9s 1000Hz 3 =========== ======= ======= ================= =============== =============== =========== **Dataset description** @@ -181,7 +181,7 @@ class Huebner2018(_BaseVisualMatrixSpellerDataset): =========== ======= ======= ================= =============== =============== =========== Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions =========== ======= ======= ================= =============== =============== =========== - Huebner2018 12 31 0.9s 1000Hz 1 + Huebner2018 12 31 364 NT / 112 T 0.9s 1000Hz 3 =========== ======= ======= ================= =============== =============== =========== **Dataset description** diff --git a/moabb/datasets/sosulski2019.py b/moabb/datasets/sosulski2019.py index df2408324..fdb300958 100644 --- a/moabb/datasets/sosulski2019.py +++ b/moabb/datasets/sosulski2019.py @@ -25,7 +25,7 @@ class Sosulski2019(BaseDataset): ============= ======= ======= ================= =============== =============== =========== Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions ============= ======= ======= ================= =============== =============== =========== - Sosulski2019 13 32 75 NT / 15 T 1000Hz 1 + Sosulski2019 13 31 75 NT / 15 T 1000Hz 3 ============= ======= ======= ================= =============== =============== =========== **Dataset description** From 7559bca75e74523e9b067fced2995ba279ad7914 Mon Sep 17 00:00:00 2001 From: gcattan Date: Wed, 7 Jun 2023 22:12:04 +0200 Subject: [PATCH 09/64] [Example] P300-VR dataset (#393) * In some places, the virtual reality dataset code was wrong. * fix: PC data not downloading. fix: inversion 12 blocks of 5 repetitions * push example from Pedro * fix error with datframe initialization * [pre-commit.ci] auto fixes from pre-commit.com hooks * add whats new * add test * [pre-commit.ci] auto fixes from pre-commit.com hooks * fix pytest/unittest * [pre-commit.ci] auto fixes from pre-commit.com hooks * replace logging by warnings library * move docstring to the top * [pre-commit.ci] auto fixes from pre-commit.com hooks * test completed * [pre-commit.ci] auto fixes from pre-commit.com hooks * leftover * typo >< * Update examples/vr_pc_p300_different_epoch_size.py Co-authored-by: Sylvain Chevallier * rename into plot_vr_pc_p300_different_epoch_size.py * - Add figure plot - add comments * [pre-commit.ci] auto fixes from pre-commit.com hooks * Update plot_vr_pc_p300_different_epoch_size.py * [pre-commit.ci] auto fixes from pre-commit.com hooks --------- Co-authored-by: Gregoire Cattan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru Co-authored-by: Sylvain Chevallier --- docs/source/whats_new.rst | 2 + .../plot_vr_pc_p300_different_epoch_size.py | 147 ++++++++++++++++++ moabb/datasets/braininvaders.py | 28 ++-- moabb/tests/datasets.py | 11 ++ 4 files changed, 177 insertions(+), 11 deletions(-) create mode 100644 examples/plot_vr_pc_p300_different_epoch_size.py diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index e9c1d25e9..a86b42834 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -24,11 +24,13 @@ Enhancements - Fixing the dataset description based on the meta information (:gh:`389` and `398` by `Bruno Aristimunha`_ and `Sara Sedlar`_) - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) +- Add example with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) Bugs ~~~~ - Restore 3 subject from Cho2017 (:gh:`392` by `Igor Carrara`_ and `Sylvain Chevallier`_) +- Correct downloading with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_) - Rename event `substraction` to `subtraction` in :func:`moabb.dataset.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) API changes diff --git a/examples/plot_vr_pc_p300_different_epoch_size.py b/examples/plot_vr_pc_p300_different_epoch_size.py new file mode 100644 index 000000000..236f2bd84 --- /dev/null +++ b/examples/plot_vr_pc_p300_different_epoch_size.py @@ -0,0 +1,147 @@ +""" +============================= +Changing epoch size in P300 VR dataset +============================= + +This example shows how to extract the epochs from the P300-VR dataset of a given +subject and then classify them using Riemannian Geometry framework for BCI. +We compare the scores in the VR and PC conditions, using different epoch size. + +This example demonstrates the use of `get_block_repetition`, which allows +to specify the experimental blocks and repetitions for analysis. +""" +# Authors: Pedro Rodrigues +# Modified by: Gregoire Cattan +# License: BSD (3-clause) + +import warnings + +import numpy as np +import pandas as pd +from pyriemann.classification import MDM +from pyriemann.estimation import ERPCovariances +from sklearn.metrics import roc_auc_score +from sklearn.model_selection import KFold +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import LabelEncoder +from tqdm import tqdm + +from moabb.datasets import VirtualReality +from moabb.paradigms import P300 + + +warnings.filterwarnings("ignore") + +############################################################################### +# Initialization +# --------------- +# +# 1) Create an instance of the dataset. +# 2) Create an instance of a P300 paradigm. +# By default filtering between 1-24 Hz +# with epochs of length 1s. +# In this example we will be modifying the length of the epochs, by +# changing the `tmax` attribute of the paradigm. +# 3) Encode categorical variable (Target/NonTarget) to numerical values. +# We will be using label encoding. + +dataset = VirtualReality() +paradigm = P300() +le = LabelEncoder().fit(["Target", "NonTarget"]) + +# change this to include more subjects +nsubjects = 2 + +############################################################################### +# Validation +# --------------- +# +# We will perform a 3-folds validation for each combination of +# tmax, subjects and experimental conditions (VR or PC). +# +# Not all the data will be used for this validation. +# The VirtualReality dataset contains the data from a randomized experiment. +# We will only be using the two first repetitions of the 12 experimental blocks. +# Data will be selected thanks to the `get_block_repetition` method. + +# Contains the score for all combination of tmax, subjects +# and experimental condition (VR or PC). +scores = [] + +# Init 3-folds validation. +kf = KFold(n_splits=3) + +# Select the first two repetitions. +repetitions = [1, 2] + +# Generate all possible arrangement with the 12 blocks. +blocks = np.arange(1, 12 + 1) + +# run validation for each combination. +for tmax in [0.2, 1.0]: + paradigm.tmax = tmax + + for subject in tqdm(dataset.subject_list[:nsubjects]): + # Note: here we are adding `tmax` to scores_subject, + # although `tmax` is defined outside the scope of this inner loop. + # The reason behind is to facilitate the conversion from array to dataframe at the end. + scores_subject = [tmax, subject] + + for condition in ["VR", "PC"]: + print(f"subject {subject}, {condition}, tmax {tmax}") + + # Rather than creating a new instance depending on the condition, + # let's change the attribute value to download the correct data. + dataset.virtual_reality = condition == "VR" + dataset.personal_computer = condition == "PC" + + auc = [] + + # Split in training and testing blocks, and fit/predict. + # This loop will run 3 times as we are using a 3-folds validation + for train_idx, test_idx in kf.split(np.arange(12)): + # Note the use of the `get_block_repetition` method, + # to select the appropriate number of blocks and repetitions: + # - 8 blocks for training, 4 for testing + # - only the first two repetitions inside each blocks + X_train, y_train, _ = dataset.get_block_repetition( + paradigm, [subject], blocks[train_idx], repetitions + ) + + X_test, y_test, _ = dataset.get_block_repetition( + paradigm, [subject], blocks[test_idx], repetitions + ) + + # We use riemannian geometry processing technics with MDM algorithm. + pipe = make_pipeline(ERPCovariances(estimator="lwf"), MDM()) + pipe.fit(X_train, y_train) + y_pred = pipe.predict(X_test) + + # y_test and y_pred contains categorical variable (Target/NonTarget). + # To use a metric, we need to convert target information to numerical values. + y_test = le.transform(y_test) + y_pred = le.transform(y_pred) + + # We use the roc_auc_score, which is a reliable metric for multi-class problem. + auc.append(roc_auc_score(y_test, y_pred)) + + # stock scores + scores_subject.append(np.mean(auc)) + + scores.append(scores_subject) + +############################################################################### +# Display of the data +# --------------- +# +# Let's transform or array to a dataframe. +# We can then print it on the console, and +# plot the mean AUC as a function of the epoch length. + +df = pd.DataFrame(scores, columns=["tmax", "subject", "VR", "PC"]) + +print(df) + +df.groupby("tmax").mean().plot( + y=["VR", "PC"], title="Mean AUC as a function of the epoch length" +) diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index 647281287..d32787912 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -4,6 +4,7 @@ import shutil import zipfile as z from distutils.dir_util import copy_tree +from warnings import warn import mne import numpy as np @@ -150,7 +151,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 stim[idx_nontarget] = 1 X = np.concatenate([S, stim[None, :]]) sfreq = 512 - elif ds.code == "Virtual Reality dataset": + elif ds.code == "P300-VR": data = loadmat(os.path.join(file_path, os.listdir(file_path)[0]))["data"] chnames = [ @@ -187,7 +188,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 verbose=False, ) - if not ds.code == "Virtual Reality dataset": + if not ds.code == "P300-VR": raw = mne.io.RawArray(data=X, info=info, verbose=False) raw.set_montage(make_standard_montage("standard_1020")) @@ -388,15 +389,16 @@ def _bi_data_path( # noqa: C901 ) for i in range(1, 5) ] - elif ds.code == "Virtual Reality dataset": + elif ds.code == "P300-VR": subject_paths = [] - url = "{:s}subject_{:02d}_{:s}.mat".format( - VIRTUALREALITY_URL, - subject, - "VR" if ds.virtual_reality else ds.personal_computer, - ) - file_path = dl.data_path(url, "VIRTUALREALITY") - subject_paths.append(file_path) + if ds.virtual_reality: + url = "{:s}subject_{:02d}_{:s}.mat".format(VIRTUALREALITY_URL, subject, "VR") + file_path = dl.data_path(url, "VIRTUALREALITY") + subject_paths.append(file_path) + if ds.personal_computer: + url = "{:s}subject_{:02d}_{:s}.mat".format(VIRTUALREALITY_URL, subject, "PC") + file_path = dl.data_path(url, "VIRTUALREALITY") + subject_paths.append(file_path) return subject_paths @@ -868,6 +870,10 @@ def __init__(self, virtual_reality=False, screen_display=True): self.virtual_reality = virtual_reality self.personal_computer = screen_display + if not self.virtual_reality and not self.personal_computer: + warn( + "[P300-VR dataset] virtual_reality and screen display are False. No data will be downloaded, unless you change these parameters after initialization." + ) def _get_single_subject_data(self, subject): """return data for a single subject""" @@ -880,7 +886,7 @@ def data_path( def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): """Select data for all provided subjects, blocks and repetitions. - Each subject has 5 blocks of 12 repetitions. + Each subject has 12 blocks of 5 repetitions. The returned data is a dictionary with the folowing structure:: diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index 5d84dca57..daa66b40d 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -76,6 +76,17 @@ def __init__(self, *args, **kwargs): def test_canary(self): assert VirtualReality() is not None + def test_warning_if_parameters_false(self): + with self.assertWarns(UserWarning): + VirtualReality(virtual_reality=False, screen_display=False) + + def test_data_path(self): + ds = VirtualReality(virtual_reality=True, screen_display=True) + data_path = ds.data_path(1) + assert len(data_path) == 2 + assert "subject_01_VR.mat" in data_path[0] + assert "subject_01_PC.mat" in data_path[1] + def test_get_block_repetition(self): ds = FakeVirtualRealityDataset() subject = 5 From 2821954cf91360c042fb625cdedf4283c0b938e9 Mon Sep 17 00:00:00 2001 From: gcattan Date: Thu, 22 Jun 2023 12:22:36 +0200 Subject: [PATCH 10/64] Resting state with dataset and example (#400) * In some places, the virtual reality dataset code was wrong. * fix: PC data not downloading. fix: inversion 12 blocks of 5 repetitions * push example from Pedro * fix error with datframe initialization * [pre-commit.ci] auto fixes from pre-commit.com hooks * add whats new * add test * [pre-commit.ci] auto fixes from pre-commit.com hooks * fix pytest/unittest * [pre-commit.ci] auto fixes from pre-commit.com hooks * replace logging by warnings library * move docstring to the top * [pre-commit.ci] auto fixes from pre-commit.com hooks * test completed * [pre-commit.ci] auto fixes from pre-commit.com hooks * leftover * typo >< * Update examples/vr_pc_p300_different_epoch_size.py Co-authored-by: Sylvain Chevallier * rename into plot_vr_pc_p300_different_epoch_size.py * - Add figure plot - add comments * [pre-commit.ci] auto fixes from pre-commit.com hooks * Update plot_vr_pc_p300_different_epoch_size.py * [pre-commit.ci] auto fixes from pre-commit.com hooks * Create resting_state.py * push resting state * add dataset * push example * couple of bug fixes * add a condition to p300 to ignore Target/NonTarget check Fix loading of the mat file * working example * improve doc * [pre-commit.ci] auto fixes from pre-commit.com hooks * Update whats_new.rst * Update phmd_ml.py * Update plot_phmd_ml_spectrum.py flake8 * complete documentation * improve lisibility * push test * [pre-commit.ci] auto fixes from pre-commit.com hooks * fix tests * event_list missing in initialization. Correct typo. * [pre-commit.ci] auto fixes from pre-commit.com hooks * fix typo * [pre-commit.ci] auto fixes from pre-commit.com hooks * Applying and improving small details inside the tutorial * [pre-commit.ci] auto fixes from pre-commit.com hooks --------- Co-authored-by: Gregoire Cattan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru Co-authored-by: Sylvain Chevallier --- docs/source/dataset_summary.rst | 13 ++++ docs/source/datasets.rst | 11 +++ docs/source/whats_new.rst | 1 + examples/plot_phmd_ml_spectrum.py | 74 ++++++++++++++++++ moabb/datasets/__init__.py | 1 + moabb/datasets/phmd_ml.py | 124 ++++++++++++++++++++++++++++++ moabb/paradigms/__init__.py | 1 + moabb/paradigms/p300.py | 22 ++++-- moabb/paradigms/resting_state.py | 81 +++++++++++++++++++ moabb/tests/paradigms.py | 48 ++++++++++++ 10 files changed, 371 insertions(+), 5 deletions(-) create mode 100644 examples/plot_phmd_ml_spectrum.py create mode 100644 moabb/datasets/phmd_ml.py create mode 100644 moabb/paradigms/resting_state.py diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst index 0d12df98d..2f02e1276 100644 --- a/docs/source/dataset_summary.rst +++ b/docs/source/dataset_summary.rst @@ -78,6 +78,19 @@ SSVEP Wang2016,34,62,40,6,5s,250Hz,1 +Resting States +====================== + +Include neuro experiments where the participant is not actively doing something. +For example, recoding the EEG of a subject while s/he is having the eye closed or opened +is a resting state experiment. + +.. csv-table:: + :header: Dataset, #Subj, #Chan, #Classes, #Blocks / class, Trials length, Sampling rate, #Sessions + :class: sortable + + HeadMountedDisplay,12,16,2,10,60s,512Hz,1 + Submit a new dataset ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index c47f7cd97..345496196 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -75,6 +75,17 @@ SSVEP Datasets Lee2019_SSVEP +---------------------- +Resting State Datasets +---------------------- + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + HeadMountedDisplay + + ------------ Base & Utils ------------ diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index a86b42834..762dcdaa7 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -25,6 +25,7 @@ Enhancements - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) - Add example with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) +- Add resting state paradigm with dataset and example (:gh:`400` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) Bugs ~~~~ diff --git a/examples/plot_phmd_ml_spectrum.py b/examples/plot_phmd_ml_spectrum.py new file mode 100644 index 000000000..722f7c91a --- /dev/null +++ b/examples/plot_phmd_ml_spectrum.py @@ -0,0 +1,74 @@ +""" +================================ +Spectral analysis of the trials +================================ + +This example demonstrates how to perform spectral +analysis on epochs extracted from a specific subject +within the :class:`moabb.datasets.HeadMountedDisplay` dataset. + +""" + +# Authors: Pedro Rodrigues +# Modified by: Gregoire Cattan +# License: BSD (3-clause) + +import warnings + +import matplotlib.pyplot as plt +import numpy as np +from scipy.signal import welch + +from moabb.datasets import HeadMountedDisplay +from moabb.paradigms import RestingStateToP300Adapter + + +warnings.filterwarnings("ignore") + +############################################################################### +# Initialization +# --------------- +# +# 1) Specify the channel and subject to compute the power spectrum. +# 2) Create an instance of the :class:`moabb.datasets.HeadMountedDisplay` dataset. +# 3) Create an instance of the :class:`moabb.paradigms.RestingStateToP300Adapter` paradigm. +# By default, the data is filtered between 1-35 Hz, +# and epochs are extracted from 10 to 50 seconds after event tagging. + +# Select channel and subject for the remaining of the example. +channel = "Cz" +subject = 1 + +dataset = HeadMountedDisplay() +events = ["on", "off"] +paradigm = RestingStateToP300Adapter(events=events, channels=[channel]) + + +############################################################################### +# Estimate Power Spectral Density +# --------------- +# 1) Obtain the epochs for the specified subject. +# 2) Use Welch's method to estimate the power spectral density. + +X, y, _ = paradigm.get_data(dataset, [subject]) +f, S = welch(X, axis=-1, nperseg=1024, fs=paradigm.resample) + +############################################################################### +# Display of the data +# --------------- +# +# Plot the averaged Power Spectral Density (PSD) for each label condition, +# using the selected channel specified at the beginning of the script. + +fig, ax = plt.subplots(facecolor="white", figsize=(8.2, 5.1)) +for condition in events: + mean_power = np.mean(S[y == condition], axis=0).flatten() + ax.plot(f, 10 * np.log10(mean_power), label=condition) + +ax.set_xlim(paradigm.fmin, paradigm.fmax) +ax.set_ylim(100, 135) +ax.set_ylabel("Spectrum Magnitude (dB)", fontsize=14) +ax.set_xlabel("Frequency (Hz)", fontsize=14) +ax.set_title("PSD for Channel " + channel, fontsize=16) +ax.legend() +fig.show() diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index 62ebc2ee4..d1ad513aa 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -35,6 +35,7 @@ from .Lee2019 import Lee2019_ERP, Lee2019_MI, Lee2019_SSVEP from .mpi_mi import MunichMI from .neiry import DemonsP300 +from .phmd_ml import HeadMountedDisplay from .physionet_mi import PhysionetMI from .schirrmeister2017 import Schirrmeister2017 from .sosulski2019 import Sosulski2019 diff --git a/moabb/datasets/phmd_ml.py b/moabb/datasets/phmd_ml.py new file mode 100644 index 000000000..b7dc6a4f2 --- /dev/null +++ b/moabb/datasets/phmd_ml.py @@ -0,0 +1,124 @@ +import os + +import mne +import numpy as np +from scipy.io import loadmat + +from . import download as dl +from .base import BaseDataset + + +HEADMOUNTED_URL = "https://zenodo.org/record/2617085/files/" + + +class HeadMountedDisplay(BaseDataset): + """ + Passive Head Mounted Display with Music Listening dataset. + + .. admonition:: Dataset summary + + + ================= ======= ======= ========== ================= ============ =============== =========== + Name #Subj #Chan #Classes #Blocks/class Trials len Sampling rate #Sessions + ================== ======= ======= ========== ================= ============ =============== =========== + HeadMountedDisplay 12 16 2 10 60s 512Hz 1 + ================== ======= ======= ========== ================= ============ =============== =========== + + We describe the experimental procedures for a dataset that we have made publicly available + at https://doi.org/10.5281/zenodo.2617084 in mat (Mathworks, Natick, USA) and csv formats. + This dataset contains electroencephalographic recordings of 12 subjects listening to music + with and without a passive head-mounted display, that is, a head-mounted display which does + not include any electronics at the exception of a smartphone. The electroencephalographic + headset consisted of 16 electrodes. Data were recorded during a pilot experiment taking + place in the GIPSA-lab, Grenoble, France, in 2017 (Cattan and al, 2018). + The ID of this dataset is PHMDML.EEG.2017-GIPSA. + + **full description of the experiment** + https://hal.archives-ouvertes.fr/hal-02085118 + + **Link to the data** + https://doi.org/10.5281/zenodo.2617084 + + **Authors** + Principal Investigator: Eng. Grégoire Cattan + Technical Supervisors: Eng. Pedro L. C. Rodrigues + Scientific Supervisor: Dr. Marco Congedo + + **ID of the dataset** + PHMDML.EEG.2017-GIPSA + + Notes + ----- + + .. versionadded:: 0.6.0 + + References + ---------- + + .. [1] G. Cattan, P. L. Coelho Rodrigues, and M. Congedo, + ‘Passive Head-Mounted Display Music-Listening EEG dataset’, + Gipsa-Lab ; IHMTEK, Research Report 2, Mar. 2019. doi: 10.5281/zenodo.2617084. + + + """ + + def __init__(self): + super().__init__( + subjects=list(range(1, 12 + 1)), + sessions_per_subject=1, + events=dict(on=1, off=2), + code="PHMD-ML", + interval=[0, 1], + paradigm="rstate", + doi="https://doi.org/10.5281/zenodo.2617084 ", + ) + self._chnames = [ + "Fp1", + "Fp2", + "Fc5", + "Fz", + "Fc6", + "T7", + "Cz", + "T8", + "P7", + "P3", + "Pz", + "P4", + "P8", + "O1", + "Oz", + "O2", + "stim", + ] + self._chtypes = ["eeg"] * 16 + ["stim"] + + def _get_single_subject_data(self, subject): + """return data for a single subject""" + + filepath = self.data_path(subject)[0] + data = loadmat(os.path.join(filepath, os.listdir(filepath)[0])) + + first_channel = 1 + last_channel = 17 + S = data["data"][:, first_channel:last_channel] + stim = data["data"][:, -1] + + X = np.concatenate([S, stim[:, None]], axis=1).T + + info = mne.create_info( + ch_names=self._chnames, sfreq=512, ch_types=self._chtypes, verbose=False + ) + raw = mne.io.RawArray(data=X, info=info, verbose=False) + return {"session_0": {"run_0": raw}} + + def data_path( + self, subject, path=None, force_update=False, update_path=None, verbose=None + ): + if subject not in self.subject_list: + raise (ValueError("Invalid subject number")) + + url = "{:s}subject_{:02d}.mat".format(HEADMOUNTED_URL, subject) + file_path = dl.data_path(url, "HEADMOUNTED") + + return [file_path] diff --git a/moabb/paradigms/__init__.py b/moabb/paradigms/__init__.py index 055e278a3..a1dac30d8 100644 --- a/moabb/paradigms/__init__.py +++ b/moabb/paradigms/__init__.py @@ -9,4 +9,5 @@ # flake8: noqa from moabb.paradigms.p300 import * +from moabb.paradigms.resting_state import * from moabb.paradigms.ssvep import * diff --git a/moabb/paradigms/p300.py b/moabb/paradigms/p300.py index b3b8def71..09650ad61 100644 --- a/moabb/paradigms/p300.py +++ b/moabb/paradigms/p300.py @@ -168,11 +168,15 @@ def process_raw( # noqa: C901 # pick events, based on event_id try: - if type(event_id["Target"]) is list and type(event_id["NonTarget"]) == list: - event_id_new = dict(Target=1, NonTarget=0) - events = mne.merge_events(events, event_id["Target"], 1) - events = mne.merge_events(events, event_id["NonTarget"], 0) - event_id = event_id_new + if "Target" in event_id and "NonTarget" in event_id: + if ( + type(event_id["Target"]) is list + and type(event_id["NonTarget"]) == list + ): + event_id_new = dict(Target=1, NonTarget=0) + events = mne.merge_events(events, event_id["Target"], 1) + events = mne.merge_events(events, event_id["NonTarget"], 0) + event_id = event_id_new events = mne.pick_events(events, include=list(event_id.values())) except RuntimeError: # skip raw if no event found @@ -317,6 +321,14 @@ def __init__(self, fmin=1, fmax=24, **kwargs): raise (ValueError("P300 does not take argument filters")) super().__init__(filters=[[fmin, fmax]], **kwargs) + @property + def fmax(self): + return self.filters[0][1] + + @property + def fmin(self): + return self.filters[0][0] + class P300(SinglePass): """P300 for Target/NonTarget classification diff --git a/moabb/paradigms/resting_state.py b/moabb/paradigms/resting_state.py new file mode 100644 index 000000000..f41ab4c67 --- /dev/null +++ b/moabb/paradigms/resting_state.py @@ -0,0 +1,81 @@ +"""Resting state Paradigms + +Regroups paradigms for experience where we record the EEG +and the participant is not doing an active task, such +as focusing, counting or speaking. + +Typically, a open/close eye experiment, where we +record the EEG of a subject while he is having the eye open or close +is a resting state experiment. +""" + +from moabb.paradigms.p300 import SinglePass + + +class RestingStateToP300Adapter(SinglePass): + """Adapter to the P300 paradigm for resting state experiments. + It implements a SinglePass processing as for P300, except that: + - the name of the event is free (it is not enforced to Target/NonTarget as for P300) + - the default values are different. In particular, the length of the epochs is larger. + + Parameters + ---------- + fmin: float (default 1) + cutoff frequency (Hz) for the high pass filter + + fmax: float (default 35) + cutoff frequency (Hz) for the low pass filter + + events: List of str | None (default None) + event to use for epoching. If None, default to all events defined in + the dataset. + + tmin: float (default 10s) + Start time (in second) of the epoch, relative to the dataset specific + task interval e.g. tmin = 1 would mean the epoch will start 1 second + after the beginning of the task as defined by the dataset. + + tmax: float | None, (default 50s) + End time (in second) of the epoch, relative to the beginning of the + dataset specific task interval. tmax = 5 would mean the epoch will end + 5 second after the beginning of the task as defined in the dataset. If + None, use the dataset value. + + resample: float | None (default 128) + If not None, resample the eeg data with the sampling rate provided. + + baseline: None | tuple of length 2 + The time interval to consider as “baseline†when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + + channels: list of str | None (default None) + list of channel to select. If None, use all EEG channels available in + the dataset. + """ + + def __init__(self, fmin=1, fmax=35, tmin=10, tmax=50, resample=128, **kwargs): + super().__init__( + fmin=fmin, fmax=fmax, tmin=tmin, tmax=tmax, resample=resample, **kwargs + ) + + def used_events(self, dataset): + return {ev: dataset.event_id[ev] for ev in self.events} + + def is_valid(self, dataset): + ret = True + if not (dataset.paradigm == "rstate"): + ret = False + + if self.events: + if not set(self.events) <= set(dataset.event_id.keys()): + ret = False + + return ret + + @property + def scoring(self): + return "roc_auc" diff --git a/moabb/tests/paradigms.py b/moabb/tests/paradigms.py index 4917aa3c5..f69f4072f 100644 --- a/moabb/tests/paradigms.py +++ b/moabb/tests/paradigms.py @@ -16,6 +16,7 @@ FilterBankMotorImagery, FilterBankSSVEP, LeftRightImagery, + RestingStateToP300Adapter, ) @@ -329,6 +330,53 @@ def test_P300_paradigm(self): self.assertIsInstance(epochs, BaseEpochs) +class Test_RestingState(unittest.TestCase): + def test_RestingState_paradigm(self): + event_list = ["Open", "Close"] + paradigm = RestingStateToP300Adapter(events=event_list) + dataset = FakeDataset(paradigm="rstate", event_list=event_list) + X, labels, metadata = paradigm.get_data(dataset, subjects=[1]) + + # we should have all the same length + self.assertEqual(len(X), len(labels), len(metadata)) + # X must be a 3D Array + self.assertEqual(len(X.shape), 3) + # labels must contain 2 values (Open/Close) + self.assertEqual(len(np.unique(labels)), 2) + # metadata must have subjets, sessions, runs + self.assertTrue("subject" in metadata.columns) + self.assertTrue("session" in metadata.columns) + self.assertTrue("run" in metadata.columns) + # we should have only one subject in the metadata + self.assertEqual(np.unique(metadata.subject), 1) + # we should have two sessions in the metadata + self.assertEqual(len(np.unique(metadata.session)), 2) + # should return epochs + epochs, _, _ = paradigm.get_data(dataset, subjects=[1], return_epochs=True) + self.assertIsInstance(epochs, BaseEpochs) + # should return raws + raws, _, _ = paradigm.get_data(dataset, subjects=[1], return_raws=True) + for raw in raws: + self.assertIsInstance(raw, BaseRaw) + # should raise error + self.assertRaises( + ValueError, + paradigm.get_data, + dataset, + subjects=[1], + return_epochs=True, + return_raws=True, + ) + + def test_RestingState_default_values(self): + paradigm = RestingStateToP300Adapter() + assert paradigm.tmin == 10 + assert paradigm.tmax == 50 + assert paradigm.fmin == 1 + assert paradigm.fmax == 35 + assert paradigm.resample == 128 + + class Test_SSVEP(unittest.TestCase): def test_BaseSSVEP_paradigm(self): paradigm = BaseSSVEP(n_classes=None) From 2b38df295bc8c79d7c7742f18c92368c7cfdadcf Mon Sep 17 00:00:00 2001 From: Bru Date: Thu, 22 Jun 2023 22:02:27 +0200 Subject: [PATCH 11/64] Saving the model (#401) * Updating README.md * Adding new saving * Adding new saving model * Adding new functions * updating the models doc * updating the models and evaluations * Update moabb/evaluations/evaluations.py * Generatic type * adding if * Adding saving the best and changing the saving * Solving Parallel and Saving Model * [pre-commit.ci] auto fixes from pre-commit.com hooks * Adding if to hdf5_path is None * Solving the new path * Adding new ifs * Returning the Results folder * Solve Saved model on Pytorch * Removing Keras models saving * Updating model_check * Updated Saved model in Pytorch, second methodology if is a Skorch model * Added Saved Model on Keras and Pytorch * Example Load model * Updating the save model, optimizing the code * Fixing saving function * renaming model to step * Updating the tutorial * Updating the path * Adding new test and fix __init__.py * Adding new tests * Updating whats new file --------- Co-authored-by: CARRARA Igor Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/source/whats_new.rst | 3 + .../plot_grid_search_withinsession.py | 4 +- examples/plot_load_model.py | 123 ++++++++++ moabb/benchmark.py | 8 +- moabb/evaluations/__init__.py | 1 + moabb/evaluations/evaluations.py | 145 ++++++++--- moabb/evaluations/utils.py | 215 +++++++++++++++++ moabb/tests/evaluations.py | 225 +++++++++++++++++- 8 files changed, 682 insertions(+), 42 deletions(-) create mode 100644 examples/plot_load_model.py create mode 100644 moabb/evaluations/utils.py diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 762dcdaa7..ce0a56e94 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -25,6 +25,8 @@ Enhancements - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) - Add example with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) +- Adding saving option for the models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) +- Adding example to load different type of models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Add resting state paradigm with dataset and example (:gh:`400` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) Bugs @@ -33,6 +35,7 @@ Bugs - Restore 3 subject from Cho2017 (:gh:`392` by `Igor Carrara`_ and `Sylvain Chevallier`_) - Correct downloading with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_) - Rename event `substraction` to `subtraction` in :func:`moabb.dataset.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) +- Fixing issue with parallel evaluation (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) API changes ~~~~~~~~~~~ diff --git a/examples/advanced_examples/plot_grid_search_withinsession.py b/examples/advanced_examples/plot_grid_search_withinsession.py index afa8f24a1..85a15dd44 100644 --- a/examples/advanced_examples/plot_grid_search_withinsession.py +++ b/examples/advanced_examples/plot_grid_search_withinsession.py @@ -145,7 +145,7 @@ path, "GridSearch_WithinSession", "001-2014", - "subject1", + "1", "session_E", "GridSearchEN", "Grid_Search_WithinSession.pkl", @@ -165,7 +165,7 @@ path, "GridSearch_WithinSession", "001-2014", - "subject1", + "1", "session_T", "GridSearchEN", "Grid_Search_WithinSession.pkl", diff --git a/examples/plot_load_model.py b/examples/plot_load_model.py new file mode 100644 index 000000000..1a7b1c32c --- /dev/null +++ b/examples/plot_load_model.py @@ -0,0 +1,123 @@ +""" +============================================== +Load Model (Scikit, Pytorch, Keras) with MOABB +============================================== + +This example shows how to use load the pretrained pipeline in MOABB. +""" +# Authors: Igor Carrara +# +# License: BSD (3-clause) + +from pickle import load + +import keras +import torch +from braindecode import EEGClassifier +from braindecode.models import EEGInception +from scikeras.wrappers import KerasClassifier +from sklearn.pipeline import Pipeline +from skorch.callbacks import EarlyStopping, EpochScoring +from skorch.dataset import ValidSplit + +from moabb import set_log_level +from moabb.pipelines.features import StandardScaler_Epoch +from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG +from moabb.utils import setup_seed + + +set_log_level("info") + +############################################################################### +# In this example, we will use the results computed by the following examples +# +# - plot_benchmark_ +# - plot_benchmark_braindecode_ +# - plot_benchmark_DL_ +# --------------------- + +# Set up reproducibility of Tensorflow and PyTorch +setup_seed(42) + +############################################################################### +# Loading the Scikit-learn pipelines + +with open( + "./results/Models_WithinSession/Zhou 2016/1/session_0/CSP + SVM/fitted_model_best.pkl", + "rb", +) as pickle_file: + CSP_SVM_Trained = load(pickle_file) + +############################################################################### +# Loading the Keras model +# We load the single Keras model, if we want we can set in the exact same pipeline. + +model_Keras = keras.models.load_model( + "./results/Models_WithinSession/001-2014/1/session_E/Keras_DeepConvNet/kerasdeepconvnet_fitted_model_best.h5" +) +# Now we need to instantiate a new SciKeras object since we only saved the Keras model +Keras_DeepConvNet_Trained = KerasClassifier(model_Keras) +# Create the pipelines + + +pipes_keras = Pipeline( + [ + ("StandardScaler_Epoch", StandardScaler_Epoch), + ("Keras_DeepConvNet_Trained", Keras_DeepConvNet_Trained), + ] +) + + +############################################################################### +# Loading the PyTorch model + +# Set EEG Inception model +model = EEGInception(in_channels=22, n_classes=2) + +# Hyperparameter +LEARNING_RATE = 0.0001 +WEIGHT_DECAY = 0 +BATCH_SIZE = 64 +SEED = 42 +VERBOSE = 1 +EPOCH = 2 +PATIENCE = 3 + +# Define a Skorch classifier +clf = EEGClassifier( + module=model, + criterion=torch.nn.CrossEntropyLoss, + optimizer=torch.optim.Adam, + optimizer__lr=LEARNING_RATE, + batch_size=BATCH_SIZE, + max_epochs=EPOCH, + train_split=ValidSplit(0.2, random_state=SEED), + callbacks=[ + EarlyStopping(monitor="valid_loss", patience=PATIENCE), + EpochScoring( + scoring="accuracy", on_train=True, name="train_acc", lower_is_better=False + ), + EpochScoring( + scoring="accuracy", on_train=False, name="valid_acc", lower_is_better=False + ), + InputShapeSetterEEG( + params_list=["in_channels", "input_window_samples", "n_classes"], + ), + ], + verbose=VERBOSE, # Not printing the results for each epoch +) + +clf.initialize() + +f_params = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_model.pkl" +f_optimizer = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_optim.pkl" +f_history = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_history.json" + +clf.load_params(f_params=f_params, f_optimizer=f_optimizer, f_history=f_history) + + +# Create the dataset +create_dataset = BraindecodeDatasetLoader(drop_last_window=False) + +# Create the pipelines +pipes_pytorch = Pipeline([("Braindecode_dataset", create_dataset), ("EEGInception", clf)]) diff --git a/moabb/benchmark.py b/moabb/benchmark.py index b90430d2f..514e09ea7 100644 --- a/moabb/benchmark.py +++ b/moabb/benchmark.py @@ -40,6 +40,7 @@ def benchmark( # noqa: C901 overwrite=False, output="./benchmark/", n_jobs=-1, + n_jobs_evaluation=1, plot=False, contexts=None, include_datasets=None, @@ -85,6 +86,9 @@ def benchmark( # noqa: C901 Folder to store the analysis results n_jobs: int Number of threads to use for running parallel jobs + n_jobs_evaluation: int, default=1 + Number of jobs for evaluation, processing in parallel the within session, + cross-session or cross-subject. plot: bool Plot results after computing contexts: str @@ -172,7 +176,8 @@ def benchmark( # noqa: C901 datasets=d, random_state=42, hdf5_path=results, - n_jobs=1, + n_jobs=n_jobs, + n_jobs_evaluation=n_jobs_evaluation, overwrite=overwrite, return_epochs=True, ) @@ -192,6 +197,7 @@ def benchmark( # noqa: C901 random_state=42, hdf5_path=results, n_jobs=n_jobs, + n_jobs_evaluation=n_jobs_evaluation, overwrite=overwrite, ) paradigm_results = context.process( diff --git a/moabb/evaluations/__init__.py b/moabb/evaluations/__init__.py index a48a6fd82..ec86b8e29 100644 --- a/moabb/evaluations/__init__.py +++ b/moabb/evaluations/__init__.py @@ -10,3 +10,4 @@ CrossSubjectEvaluation, WithinSessionEvaluation, ) +from .utils import create_save_path, save_model_cv, save_model_list diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 4f7230fa7..2e042cb10 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -15,13 +15,15 @@ LeaveOneGroupOut, StratifiedKFold, StratifiedShuffleSplit, - cross_val_score, + cross_validate, ) from sklearn.model_selection._validation import _fit_and_score, _score from sklearn.preprocessing import LabelEncoder +from sklearn.utils import parallel_backend from tqdm import tqdm from moabb.evaluations.base import BaseEvaluation +from moabb.evaluations.utils import create_save_path, save_model_cv, save_model_list try: @@ -83,7 +85,7 @@ class WithinSessionEvaluation(BaseEvaluation): suffix: str Suffix for the results file. hdf5_path: str - Specific path for storing the results. + Specific path for storing the results and models. additional_columns: None Adding information to results. return_epochs: bool, default=False @@ -178,12 +180,13 @@ def _grid_search(self, param_grid, name_grid, name, grid_clf, X_, y_, cv): # flake8: noqa: C901 def _evaluate(self, dataset, pipelines, param_grid): - results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( - delayed(self._evaluate_subject)(dataset, pipelines, param_grid, subject) - for subject in tqdm( - dataset.subject_list, desc=f"{dataset.code}-WithinSession" + with parallel_backend("threading"): + results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( + delayed(self._evaluate_subject)(dataset, pipelines, param_grid, subject) + for subject in tqdm( + dataset.subject_list, desc=f"{dataset.code}-WithinSession" + ) ) - ) # Concatenate the results from all subjects yield from [res for subject_results in results for res in subject_results] @@ -220,32 +223,51 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): grid_clf = clone(clf) - name_grid = os.path.join( - str(self.hdf5_path), - "GridSearch_WithinSession", + # Create folder for grid search results + name_grid = create_save_path( + self.hdf5_path, dataset.code, - "subject" + str(subject), - str(session), - str(name), + subject, + session, + name, + grid=True, + eval_type="WithinSession", ) # Implement Grid Search grid_clf = self._grid_search( param_grid, name_grid, name, grid_clf, X_, y_, cv ) + if self.hdf5_path is not None: + model_save_path = create_save_path( + self.hdf5_path, + dataset.code, + subject, + session, + name, + grid=False, + eval_type="WithinSession", + ) if isinstance(X, BaseEpochs): scorer = get_scorer(self.paradigm.scoring) acc = list() X_ = X[ix] y_ = y[ix] if self.mne_labels else y_cv - for train, test in cv.split(X_, y_): + for cv_ind, (train, test) in enumerate(cv.split(X_, y_)): cvclf = clone(grid_clf) cvclf.fit(X_[train], y_[train]) acc.append(scorer(cvclf, X_[test], y_[test])) + + if self.hdf5_path is not None: + save_model_cv( + model=cvclf, save_path=model_save_path, cv_index=cv_ind + ) + acc = np.array(acc) + score = acc.mean() else: - acc = cross_val_score( + results = cross_validate( grid_clf, X[ix], y_cv, @@ -253,13 +275,22 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): scoring=self.paradigm.scoring, n_jobs=self.n_jobs, error_score=self.error_score, + return_estimator=True, ) - score = acc.mean() + score = results["test_score"].mean() if _carbonfootprint: emissions = tracker.stop() if emissions is None: emissions = np.NaN duration = time() - t_start + + if self.hdf5_path is not None: + save_model_list( + results["estimator"], + score_list=results["test_score"], + save_path=model_save_path, + ) + nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { "time": duration / 5.0, # 5 fold CV @@ -448,7 +479,7 @@ class CrossSessionEvaluation(BaseEvaluation): suffix: str Suffix for the results file. hdf5_path: str - Specific path for storing the results. + Specific path for storing the results and models. additional_columns: None Adding information to results. return_epochs: bool, default=False @@ -500,11 +531,12 @@ def evaluate(self, dataset, pipelines, param_grid): raise AssertionError("Dataset is not appropriate for evaluation") # Progressbar at subject level results = [] - for result in Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( - delayed(self.process_subject)(subject, param_grid, pipelines, dataset) - for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-CrossSession") - ): - results.extend(result) + with parallel_backend("threading"): + for result in Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( + delayed(self.process_subject)(subject, param_grid, pipelines, dataset) + for subject in dataset.subject_list + ): + results.extend(result) return results @@ -541,12 +573,14 @@ def process_subject(self, subject, param_grid, pipelines, dataset): grid_clf = clone(clf) # Load result if the folder exist - name_grid = os.path.join( - str(self.hdf5_path), - "GridSearch_CrossSession", - dataset.code, - str(subject), - name, + name_grid = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=True, + eval_type="CrossSession", ) # Implement Grid Search @@ -559,14 +593,32 @@ def process_subject(self, subject, param_grid, pipelines, dataset): if emissions_grid is None: emissions_grid = 0 - for train, test in cv.split(X, y, groups): + if self.hdf5_path is not None: + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSession", + ) + + for cv_ind, (train, test) in enumerate(cv.split(X, y, groups)): + model_list = [] if _carbonfootprint: tracker.start() t_start = time() if isinstance(X, BaseEpochs): cvclf = clone(grid_clf) cvclf.fit(X[train], y[train]) + model_list.append(cvclf) score = scorer(cvclf, X[test], y[test]) + + if self.hdf5_path is not None: + save_model_cv( + model=cvclf, save_path=model_save_path, cv_index=str(cv_ind) + ) else: result = _fit_and_score( clone(grid_clf), @@ -579,14 +631,21 @@ def process_subject(self, subject, param_grid, pipelines, dataset): parameters=None, fit_params=None, error_score=self.error_score, + return_estimator=True, ) score = result["test_scores"] + model_list = result["estimator"] if _carbonfootprint: emissions = tracker.stop() if emissions is None: emissions = 0 duration = time() - t_start + if self.hdf5_path is not None: + save_model_list( + model_list=model_list, score_list=score, save_path=model_save_path + ) + nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { "time": duration, @@ -636,7 +695,7 @@ class CrossSubjectEvaluation(BaseEvaluation): suffix: str Suffix for the results file. hdf5_path: str - Specific path for storing the results. + Specific path for storing the results and models. additional_columns: None Adding information to results. return_epochs: bool, default=False @@ -724,6 +783,7 @@ def evaluate(self, dataset, pipelines, param_grid): for name, clf in pipelines.items(): if _carbonfootprint: tracker.start() + name_grid = os.path.join( str(self.hdf5_path), "GridSearch_CrossSubject", dataset.code, name ) @@ -738,15 +798,16 @@ def evaluate(self, dataset, pipelines, param_grid): emissions_grid[name] = 0 # Progressbar at subject level - for train, test in tqdm( - cv.split(X, y, groups), - total=n_subjects, - desc=f"{dataset.code}-CrossSubject", + for cv_ind, (train, test) in enumerate( + tqdm( + cv.split(X, y, groups), + total=n_subjects, + desc=f"{dataset.code}-CrossSubject", + ) ): subject = groups[test[0]] # now we can check if this subject has results run_pipes = self.results.not_yet_computed(pipelines, dataset, subject) - # iterate over pipelines for name, clf in run_pipes.items(): if _carbonfootprint: @@ -759,6 +820,20 @@ def evaluate(self, dataset, pipelines, param_grid): emissions = 0 duration = time() - t_start + if self.hdf5_path is not None: + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSubject", + ) + + save_model_cv( + model=model, save_path=model_save_path, cv_index=str(cv_ind) + ) # we eval on each session for session in np.unique(sessions[test]): ix = sessions[test] == session diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py new file mode 100644 index 000000000..e8241e8c2 --- /dev/null +++ b/moabb/evaluations/utils.py @@ -0,0 +1,215 @@ +from __future__ import annotations + +from pathlib import Path +from pickle import HIGHEST_PROTOCOL, dump +from typing import Sequence + +from numpy import argmax +from sklearn.pipeline import Pipeline + + +def _check_if_is_keras_model(model): + """ + Check if the model is a Keras model + Parameters + ---------- + model: object + Model to check + Returns + ------- + is_keras_model: bool + True if the model is a Keras model + """ + try: + from scikeras.wrappers import KerasClassifier + + is_keras_model = isinstance(model, KerasClassifier) + return is_keras_model + except ImportError: + return False + + +def _check_if_is_pytorch_model(model): + """ + Check if the model is a Keras model + Parameters + ---------- + model: object + Model to check + Returns + ------- + is_keras_model: bool + True if the model is a Keras model + """ + try: + from skorch import NeuralNetClassifier + + is_pytorch_model = isinstance(model, NeuralNetClassifier) + return is_pytorch_model + except ImportError: + return False + + +def _check_if_is_pytorch_steps(model): + skorch_valid = False + try: + skorch_valid = any( + _check_if_is_pytorch_model(j) for j in model.named_steps.values() + ) + return skorch_valid + except Exception: + return skorch_valid + + +def _check_if_is_keras_steps(model): + keras_valid = False + try: + keras_valid = any(_check_if_is_keras_model(j) for j in model.named_steps.values()) + return keras_valid + except Exception: + return keras_valid + + +def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): + """ + Save a model fitted to a folder + Parameters + ---------- + model: object + Model (pipeline) fitted + save_path: str + Path to save the model, will create if it does not exist + based on the parameter hdf5_path from the evaluation object. + cv_index: str + Index of the cross-validation fold used to fit the model + or 'best' if the model is the best fitted + + Returns + ------- + + """ + if save_path is None: + raise IOError("No path to save the model") + else: + Path(save_path).mkdir(parents=True, exist_ok=True) + + if _check_if_is_pytorch_steps(model): + for step_name in model.named_steps: + step = model.named_steps[step_name] + file_step = f"{step_name}_fitted_{cv_index}" + + if _check_if_is_pytorch_model(step): + step.save_params( + f_params=Path(save_path) / f"{file_step}_model.pkl", + f_optimizer=Path(save_path) / f"{file_step}_optim.pkl", + f_history=Path(save_path) / f"{file_step}_history.json", + f_criterion=Path(save_path) / f"{file_step}_criterion.pkl", + ) + else: + with open((Path(save_path) / f"{file_step}.pkl"), "wb") as file: + dump(step, file, protocol=HIGHEST_PROTOCOL) + + elif _check_if_is_keras_steps(model): + for step_name in model.named_steps: + file_step = f"{step_name}_fitted_model_{cv_index}" + step = model.named_steps[step_name] + if _check_if_is_keras_model(step): + step.model_.save(Path(save_path) / f"{file_step}.h5") + else: + with open((Path(save_path) / f"{file_step}.pkl"), "wb") as file: + dump(step, file, protocol=HIGHEST_PROTOCOL) + else: + with open((Path(save_path) / f"fitted_model_{cv_index}.pkl"), "wb") as file: + dump(model, file, protocol=HIGHEST_PROTOCOL) + + +def save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path: str): + """ + Save a list of models fitted to a folder + Parameters + ---------- + model_list: list | Pipeline + List of models or model (pipelines) fitted + score_list: Sequence + List of scores for each model in model_list + save_path: str + Path to save the models, will create if it does not exist + based on the parameter hdf5_path from the evaluation object. + Returns + ------- + """ + if model_list is None: + return + + Path(save_path).mkdir(parents=True, exist_ok=True) + + if not isinstance(model_list, list): + model_list = [model_list] + + for cv_index, model in enumerate(model_list): + save_model_cv(model, save_path, str(cv_index)) + + best_model = model_list[argmax(score_list)] + + save_model_cv(best_model, save_path, "best") + + +def create_save_path( + hdf5_path, + code: str, + subject: int | str, + session: str, + name: str, + grid=False, + eval_type="WithinSession", +): + """ + Create a save path based on evaluation parameters. + Parameters + ---------- + hdf5_path : str + The base path where the models will be saved. + code : str + The code for the evaluation. + subject : int + The subject ID for the evaluation. + session : str + The session ID for the evaluation. + name : str + The name for the evaluation. + grid : bool, optional + Whether the evaluation is a grid search or not. Defaults to False. + eval_type : str, optional + The type of evaluation, either 'WithinSession', 'CrossSession' or 'CrossSubject'. + Defaults to WithinSession. + Returns + ------- + path_save: str + The created save path. + """ + if hdf5_path is not None: + if eval_type != "WithinSession": + session = "" + + if grid: + path_save = ( + Path(hdf5_path) + / f"GridSearch_{eval_type}" + / code + / f"{str(subject)}" + / str(session) + / str(name) + ) + else: + path_save = ( + Path(hdf5_path) + / f"Models_{eval_type}" + / code + / f"{str(subject)}" + / str(session) + / str(name) + ) + + return str(path_save) + else: + print("No hdf5_path provided, models will not be saved.") diff --git a/moabb/tests/evaluations.py b/moabb/tests/evaluations.py index d08e53b09..22411e9ad 100644 --- a/moabb/tests/evaluations.py +++ b/moabb/tests/evaluations.py @@ -11,12 +11,14 @@ from pyriemann.estimation import Covariances from pyriemann.spatialfilters import CSP from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA +from sklearn.dummy import DummyClassifier as Dummy from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import make_pipeline +from sklearn.pipeline import Pipeline, make_pipeline from moabb.analysis.results import get_string_rep from moabb.datasets.fake import FakeDataset from moabb.evaluations import evaluations as ev +from moabb.evaluations.utils import create_save_path, save_model_cv, save_model_list from moabb.paradigms.motor_imagery import FakeImageryParadigm @@ -27,7 +29,6 @@ except ImportError: _carbonfootprint = False - pipelines = OrderedDict() pipelines["C"] = make_pipeline(Covariances("oas"), CSP(8), LDA()) dataset = FakeDataset(["left_hand", "right_hand"], n_subjects=2) @@ -81,7 +82,7 @@ def test_eval_grid_search(self): "res_test", "GridSearch_WithinSession", str(dataset.code), - "subject1", + "1", "session_0", "C", "Grid_Search_WithinSession.pkl", @@ -237,7 +238,7 @@ def tearDown(self): os.remove(path) def test_fails_if_nothing_returned(self): - self.assertRaises(ValueError, self.eval.process, pipelines) + self.assertRaises(Exception, self.eval.process, pipelines) # TODO Add custom evaluation that actually returns additional info @@ -276,5 +277,221 @@ def test_compatible_dataset(self): self.assertTrue(self.eval.is_valid(dataset=ds)) +class UtilEvaluation(unittest.TestCase): + def test_save_model_cv(self): + model = Dummy() + save_path = "test_save_path" + cv_index = 0 + + save_model_cv(model, save_path, cv_index) + + # Assert that the saved model file exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl"))) + + def test_save_model_list(self): + step = Dummy() + model = Pipeline([("step", step)]) + model_list = [model] + score_list = [0.8] + save_path = "test_save_path" + save_model_list(model_list, score_list, save_path) + + # Assert that the saved model file for best model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl"))) + + def test_create_save_path(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evaluation_name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_WithinSession", code, "1", "session_0", "evaluation_name" + ) + self.assertEqual(save_path, expected_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + expected_grid_path = os.path.join( + hdf5_path, + "GridSearch_WithinSession", + code, + "1", + "session_0", + "evaluation_name", + ) + self.assertEqual(grid_save_path, expected_grid_path) + + def test_save_model_cv_with_pytorch_model(self): + try: + import torch + from skorch import NeuralNetClassifier + except ImportError: + self.skipTest("skorch library not available") + + step = NeuralNetClassifier(module=torch.nn.Linear(10, 2)) + step.initialize() + model = Pipeline([("step", step)]) + save_path = "." + cv_index = 0 + save_model_cv(model, save_path, cv_index) + + # Assert that the saved model files exist + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_model.pkl")) + ) + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_optim.pkl")) + ) + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_history.json")) + ) + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_criterion.pkl")) + ) + + def test_save_model_list_with_multiple_models(self): + model1 = Dummy() + model2 = Dummy() + model_list = [model1, model2] + score_list = [0.8, 0.9] + save_path = "test_save_path" + save_model_list(model_list, score_list, save_path) + + # Assert that the saved model files for each model exist + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl"))) + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_1.pkl"))) + + # Assert that the saved model file for the best model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl"))) + + def test_create_save_path_with_cross_session_evaluation(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evaluation_name" + eval_type = "CrossSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_CrossSession", code, "1", "evaluation_name" + ) + self.assertEqual(save_path, expected_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + expected_grid_path = os.path.join( + hdf5_path, "GridSearch_CrossSession", code, "1", "evaluation_name" + ) + self.assertEqual(grid_save_path, expected_grid_path) + + def test_create_save_path_without_hdf5_path(self): + hdf5_path = None + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evaluation_name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + self.assertIsNone(save_path) + + def test_save_model_cv_without_hdf5_path(self): + model = DummyClassifier(kernel="rbf") + save_path = None + cv_index = 0 + + # Assert that calling save_model_cv without a save_path does raise an IOError + with self.assertRaises(IOError): + save_model_cv(model, save_path, cv_index) + + def test_save_model_list_with_single_model(self): + model = Dummy() + model_list = model + score_list = [0.8] + save_path = "test_save_path" + save_model_list(model_list, score_list, save_path) + + # Assert that the saved model file for the single model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl"))) + + # Assert that the saved model file for the best model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl"))) + + def test_create_save_path_with_cross_subject_evaluation(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = "1" + session = "" + name = "evaluation_name" + eval_type = "CrossSubject" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_CrossSubject", code, "1", "evaluation_name" + ) + self.assertEqual(save_path, expected_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + expected_grid_path = os.path.join( + hdf5_path, "GridSearch_CrossSubject", code, "1", "evaluation_name" + ) + self.assertEqual(grid_save_path, expected_grid_path) + + def test_create_save_path_without_hdf5_path_or_session(self): + hdf5_path = None + code = "evaluation_code" + subject = 1 + session = "" + name = "evaluation_name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + self.assertIsNone(save_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + self.assertIsNone(grid_save_path) + + def test_create_save_path_with_special_characters(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evalu@tion#name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_WithinSession", code, "1", "session_0", "evalu@tion#name" + ) + self.assertEqual(save_path, expected_path) + + if __name__ == "__main__": unittest.main() From 189420a435dbf13bc5b21617ce4f17864ff8d0e0 Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Fri, 23 Jun 2023 12:04:43 +0200 Subject: [PATCH 12/64] Save PhysionetMI parameters (#403) * Update PhysionetMI Fixes #387 * Update whats_new.rst --------- Co-authored-by: Bru --- docs/source/whats_new.rst | 1 + moabb/datasets/physionet_mi.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index ce0a56e94..4a76327ea 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -35,6 +35,7 @@ Bugs - Restore 3 subject from Cho2017 (:gh:`392` by `Igor Carrara`_ and `Sylvain Chevallier`_) - Correct downloading with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_) - Rename event `substraction` to `subtraction` in :func:`moabb.dataset.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) +- Save parameters of :func:`moabb.dataset.PhysionetMI` (:gh:`403` by `Pierre Guetschel`_) - Fixing issue with parallel evaluation (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) API changes diff --git a/moabb/datasets/physionet_mi.py b/moabb/datasets/physionet_mi.py index bcb1ce9e2..6ae441287 100644 --- a/moabb/datasets/physionet_mi.py +++ b/moabb/datasets/physionet_mi.py @@ -91,6 +91,8 @@ def __init__(self, imagined=True, executed=False): doi="10.1109/TBME.2004.827072", ) + self.imagined = imagined + self.executed = executed self.feet_runs = [] self.hand_runs = [] From a6e726d9d1bb47b7ba137ad88b97d9b2c95eaa6e Mon Sep 17 00:00:00 2001 From: Bru Date: Mon, 26 Jun 2023 16:32:49 +0200 Subject: [PATCH 13/64] SSL error (#409) * Updating README.md * Removing verify given SSLError * Updating the whats_new.rst --- docs/source/whats_new.rst | 1 + moabb/datasets/download.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 4a76327ea..2a414e484 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -37,6 +37,7 @@ Bugs - Rename event `substraction` to `subtraction` in :func:`moabb.dataset.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) - Save parameters of :func:`moabb.dataset.PhysionetMI` (:gh:`403` by `Pierre Guetschel`_) - Fixing issue with parallel evaluation (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) +- Fixing SSLError from BCI competition IV (:gh:`404` by `Bruno Aristimunha`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py index c2faedbc0..1ecdfadc1 100644 --- a/moabb/datasets/download.py +++ b/moabb/datasets/download.py @@ -1,5 +1,6 @@ # Author: Alexandre Barachant # Sylvain Chevallier +# Bruno Aristimunha # License: BSD Style. import json @@ -11,7 +12,7 @@ from mne import get_config, set_config from mne.datasets.utils import _get_path from mne.utils import _url_to_local_path, verbose -from pooch import file_hash, retrieve +from pooch import HTTPDownloader, file_hash, retrieve from requests.exceptions import HTTPError @@ -140,6 +141,8 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None): table = {ord(c): "-" for c in ':*?"<>|'} destination = Path(str(path) + destination.split(str(path))[1].translate(table)) + downloader = HTTPDownloader(verify=False) + # Fetch the file if not destination.is_file() or force_update: if destination.is_file(): @@ -155,6 +158,7 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None): fname=Path(url).name, path=str(destination.parent), progressbar=True, + downloader=downloader, ) return dlpath From d4f23b68e8d071ea4b6db86d595f95827208e503 Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Mon, 26 Jun 2023 19:03:59 +0200 Subject: [PATCH 14/64] Update CONTRIBUTING.md for deeplaerning dependancies (#407) * Update CONTRIBUTING.md for Dee-learning * [pre-commit.ci] auto fixes from pre-commit.com hooks --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru --- CONTRIBUTING.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4a4a00f41..85bf674cc 100755 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -80,6 +80,27 @@ disable `poetry` environment creation. Also in this case be careful with version in your environment - it has to satisfy requirements stated in `pyproject.toml`. In case you disable `poetry` you are in charge of this. +_Note 3 (deep learning):_\ +In case you want to install the optional deep learning dependencies (i.e. `poetry install --with deeplearning`), +you will need to do the following additional steps if you want `tensorflow` to detect your +GPU: + +```bash +# Instructions for tensorflow==2.12 +conda install -c conda-forge cudatoolkit=11.8.0 +mkdir -p $CONDA_PREFIX/etc/conda/activate.d +echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh +echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/:$CUDNN_PATH/lib' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh +source $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh +# Verify install: +python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" +``` + +Then, at every use, re-run the command +`source $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh` (you can simply add this line to +your `.bashrc`). For more details, please refer to +[the official documentation](https://www.tensorflow.org/install/pip). + ### Tools used MOABB uses [`poetry`](https://python-poetry.org/) for dependency management. This tool From ce990b2b75ca0b66b570245a1094b645ef2e5e46 Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Thu, 29 Jun 2023 14:29:29 +0200 Subject: [PATCH 15/64] Fix MNEBNCI data_path (#412) * Fix #383 * Update whats_new.rst --- docs/source/whats_new.rst | 1 + moabb/datasets/bnci.py | 67 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 2a414e484..58d960d2c 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -38,6 +38,7 @@ Bugs - Save parameters of :func:`moabb.dataset.PhysionetMI` (:gh:`403` by `Pierre Guetschel`_) - Fixing issue with parallel evaluation (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Fixing SSLError from BCI competition IV (:gh:`404` by `Bruno Aristimunha`_) +- Fixing :func:`moabb.dataset.bnci.MNEBNCI.data_path` that returned the data itself instead of paths (:gh:`412` by `Pierre Guetschel`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py index 51b4bdbb9..c8b80a402 100644 --- a/moabb/datasets/bnci.py +++ b/moabb/datasets/bnci.py @@ -29,6 +29,7 @@ def load_data( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): # noqa: D301 """Get paths to local copies of a BNCI dataset files. @@ -54,6 +55,9 @@ def load_data( update_path : bool | None If True, set the MNE_DATASETS_BNCI_PATH in mne-python config to the given path. If None, the user is prompted. + only_filenames : bool + If True, return only the local path of the files without + loading the data. verbose : bool, str, int, or None If not None, override default verbose level (see :func:`mne.verbose` and :ref:`Logging documentation ` for more). @@ -103,7 +107,13 @@ def load_data( ) return dataset_list[dataset]( - subject, path, force_update, update_path, baseurl_list[dataset], verbose + subject, + path, + force_update, + update_path, + baseurl_list[dataset], + only_filenames, + verbose, ) @@ -114,6 +124,7 @@ def _load_data_001_2014( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 001-2014 dataset.""" @@ -130,12 +141,18 @@ def _load_data_001_2014( ch_types = ["eeg"] * 22 + ["eog"] * 3 sessions = {} + filenames = [] for r in ["T", "E"]: url = "{u}001-2014/A{s:02d}{r}.mat".format(u=base_url, s=subject, r=r) filename = data_path(url, path, force_update, update_path) + filenames += filename + if only_filenames: + continue runs, ev = _convert_mi(filename[0], ch_names, ch_types) # FIXME: deal with run with no event (1:3) and name them sessions["session_%s" % r] = {"run_%d" % ii: run for ii, run in enumerate(runs)} + if only_filenames: + return filenames return sessions @@ -146,6 +163,7 @@ def _load_data_002_2014( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 002-2014 dataset.""" @@ -153,14 +171,19 @@ def _load_data_002_2014( raise ValueError("Subject must be between 1 and 14. Got %d." % subject) runs = [] + filenames = [] for r in ["T", "E"]: url = "{u}002-2014/S{s:02d}{r}.mat".format(u=base_url, s=subject, r=r) filename = data_path(url, path, force_update, update_path)[0] - + filenames.append(filename) + if only_filenames: + continue # FIXME: electrode position and name are not provided directly. raws, _ = _convert_mi(filename, None, ["eeg"] * 15) runs.extend(raws) + if only_filenames: + return filenames runs = {"run_%d" % ii: run for ii, run in enumerate(runs)} return {"session_0": runs} @@ -172,6 +195,7 @@ def _load_data_004_2014( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 004-2014 dataset.""" @@ -182,12 +206,18 @@ def _load_data_004_2014( ch_types = ["eeg"] * 3 + ["eog"] * 3 sessions = [] + filenames = [] for r in ["T", "E"]: url = "{u}004-2014/B{s:02d}{r}.mat".format(u=base_url, s=subject, r=r) filename = data_path(url, path, force_update, update_path)[0] + filenames.append(filename) + if only_filenames: + continue raws, _ = _convert_mi(filename, ch_names, ch_types) sessions.extend(raws) + if only_filenames: + return filenames sessions = {"session_%d" % ii: {"run_0": run} for ii, run in enumerate(sessions)} return sessions @@ -199,6 +229,7 @@ def _load_data_008_2014( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 008-2014 dataset.""" @@ -207,7 +238,8 @@ def _load_data_008_2014( url = "{u}008-2014/A{s:02d}.mat".format(u=base_url, s=subject) filename = data_path(url, path, force_update, update_path)[0] - + if only_filenames: + return [filename] run = loadmat(filename, struct_as_record=False, squeeze_me=True)["data"] raw, event_id = _convert_run_p300_sl(run, verbose=verbose) @@ -223,6 +255,7 @@ def _load_data_009_2014( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 009-2014 dataset.""" @@ -233,6 +266,8 @@ def _load_data_009_2014( # we load only grid speller data url = "{u}009-2014/A{s:02d}S.mat".format(u=base_url, s=subject) filename = data_path(url, path, force_update, update_path)[0] + if only_filenames: + return [filename] data = loadmat(filename, struct_as_record=False, squeeze_me=True)["data"] sess = [] @@ -259,6 +294,7 @@ def _load_data_001_2015( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 001-2015 dataset.""" @@ -279,11 +315,17 @@ def _load_data_001_2015( ch_types = ["eeg"] * 13 sessions = {} + filenames = [] for r in ses: url = "{u}001-2015/S{s:02d}{r}.mat".format(u=base_url, s=subject, r=r) filename = data_path(url, path, force_update, update_path) + filenames += filename + if only_filenames: + continue runs, ev = _convert_mi(filename[0], ch_names, ch_types) sessions["session_%s" % r] = {"run_%d" % ii: run for ii, run in enumerate(runs)} + if only_filenames: + return filenames return sessions @@ -294,6 +336,7 @@ def _load_data_003_2015( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 003-2015 dataset.""" @@ -302,6 +345,8 @@ def _load_data_003_2015( url = "{u}003-2015/s{s:d}.mat".format(u=base_url, s=subject) filename = data_path(url, path, force_update, update_path)[0] + if only_filenames: + return [filename] data = loadmat(filename, struct_as_record=False, squeeze_me=True) data = data["s%d" % subject] @@ -350,6 +395,7 @@ def _load_data_004_2015( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 004-2015 dataset.""" @@ -360,6 +406,8 @@ def _load_data_004_2015( url = "{u}004-2015/{s}.mat".format(u=base_url, s=subjects[subject - 1]) filename = data_path(url, path, force_update, update_path)[0] + if only_filenames: + return [filename] # fmt: off ch_names = [ @@ -381,6 +429,7 @@ def _load_data_009_2015( force_update=False, update_path=None, base_url=BBCI_URL, + only_filenames=False, verbose=None, ): """Load data for 009-2015 dataset.""" @@ -396,6 +445,8 @@ def _load_data_009_2015( s = subjects[subject - 1] url = "{u}BNCIHorizon2020-AMUSE/AMUSE_VP{s}.mat".format(u=base_url, s=s) filename = data_path(url, path, force_update, update_path)[0] + if only_filenames: + return [filename] ch_types = ["eeg"] * 60 + ["eog"] * 2 @@ -409,6 +460,7 @@ def _load_data_010_2015( force_update=False, update_path=None, base_url=BBCI_URL, + only_filenames=False, verbose=None, ): """Load data for 010-2015 dataset.""" @@ -425,6 +477,8 @@ def _load_data_010_2015( s = subjects[subject - 1] url = "{u}BNCIHorizon2020-RSVP/RSVP_VP{s}.mat".format(u=base_url, s=s) filename = data_path(url, path, force_update, update_path)[0] + if only_filenames: + return [filename] ch_types = ["eeg"] * 63 @@ -438,6 +492,7 @@ def _load_data_012_2015( force_update=False, update_path=None, base_url=BBCI_URL, + only_filenames=False, verbose=None, ): """Load data for 012-2015 dataset.""" @@ -449,6 +504,8 @@ def _load_data_012_2015( s = subjects[subject - 1] url = "{u}BNCIHorizon2020-PASS2D/PASS2D_VP{s}.mat".format(u=base_url, s=s) filename = data_path(url, path, force_update, update_path)[0] + if only_filenames: + return [filename] ch_types = ["eeg"] * 63 @@ -462,6 +519,7 @@ def _load_data_013_2015( force_update=False, update_path=None, base_url=BNCI_URL, + only_filenames=False, verbose=None, ): """Load data for 013-2015 dataset.""" @@ -472,6 +530,8 @@ def _load_data_013_2015( for r in ["s1", "s2"]: url = "{u}013-2015/Subject{s:02d}_{r}.mat".format(u=base_url, s=subject, r=r) data_paths.extend(data_path(url, path, force_update, update_path)) + if only_filenames: + return data_paths raws = [] event_id = {} @@ -674,6 +734,7 @@ def data_path( update_path=update_path, path=path, force_update=force_update, + only_filenames=True, ) From 00c1e1248dcad477d19573bbb59a501f35a56a7a Mon Sep 17 00:00:00 2001 From: Bru Date: Mon, 3 Jul 2023 16:38:38 +0200 Subject: [PATCH 16/64] Fixing small import issue (#414) * Updating README.md * Fixing the import with fakedaset --- docs/source/whats_new.rst | 2 ++ moabb/datasets/__init__.py | 1 + 2 files changed, 3 insertions(+) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 58d960d2c..43274be37 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -39,6 +39,8 @@ Bugs - Fixing issue with parallel evaluation (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Fixing SSLError from BCI competition IV (:gh:`404` by `Bruno Aristimunha`_) - Fixing :func:`moabb.dataset.bnci.MNEBNCI.data_path` that returned the data itself instead of paths (:gh:`412` by `Pierre Guetschel`_) +- Adding :func:`moabb.datasets.fake` in the init file to use in braindecode object (:gh:`414` by `Bruno Aristimunha`_) + API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index d1ad513aa..dfed77561 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -30,6 +30,7 @@ bi2015b, ) from .epfl import EPFLP300 +from .fake import FakeDataset, FakeVirtualRealityDataset from .gigadb import Cho2017 from .huebner_llp import Huebner2017, Huebner2018 from .Lee2019 import Lee2019_ERP, Lee2019_MI, Lee2019_SSVEP From 75217a870b3d0a554faf2a3f1972034d2b2edc2d Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Tue, 4 Jul 2023 15:46:34 +0200 Subject: [PATCH 17/64] Resample before DL pipelines (#406) * Test impact of resample on EEGNet * Update pipeline name * Solve the resampling for Both keras and Pytorch model. The resampling frequency are set to: EEGNetv4: 128 Deep4Net/DeepConvNet: 250 ShallowFBCSPNet: 250 EEGInception: 128 EEGTCNet: 250 EEGNeX: 128 I have changed the implementation of Keras Pipeline, Data are loaded as Epoch, resample and than converted to numpy array. * Reducing the epochs and batch in the examples * Change the parameter for Keras DL - Epoch 300 - Patience 75 * Change the parameter for Keras DL - Epoch 300 - Patience 75 * Change the parameter for Keras DL - Epoch 300 - Patience 75 - Lr 0.001 * Delete EEGNet_resample (used for testing) * enh: correct CI error with NoneType not subscriptable * fix: doc build CI, correct save model error * Changing the name --------- Co-authored-by: CARRARA Igor Co-authored-by: Bru Co-authored-by: Sylvain Chevallier Co-authored-by: Sylvain Chevallier --- .../{plot_load_model.py => load_model.py} | 0 examples/pipelines_DL/Keras_DeepConvNet.yml | 10 ++++- examples/pipelines_DL/Keras_EEGITNet.yml | 10 ++++- examples/pipelines_DL/Keras_EEGNeX.yml | 10 ++++- examples/pipelines_DL/Keras_EEGNet_8_2.yml | 10 ++++- examples/pipelines_DL/Keras_EEGTCNet.yml | 10 ++++- .../pipelines_DL/Keras_ShallowConvNet.yml | 10 ++++- .../braindecode_EEGInception.py | 15 +++++-- .../braindecode_EEGNetv4.py | 15 +++++-- .../braindecode_ShallowFBCSPNet.py | 16 +++++--- examples/pipelines_save/Keras_DeepConvNet.yml | 40 +++++++++++++++++++ moabb/benchmark.py | 2 +- moabb/evaluations/evaluations.py | 14 +++---- moabb/pipelines/features.py | 33 +++++++++++++++ pipelines/Keras_DeepConvNet.yml | 16 ++++++-- pipelines/Keras_EEGITNet.yml | 16 ++++++-- pipelines/Keras_EEGNeX.yml | 16 ++++++-- pipelines/Keras_EEGNet_8_2.yml | 16 ++++++-- pipelines/Keras_EEGTCNet.yml | 16 ++++++-- pipelines/Keras_ShallowConvNet.yml | 16 ++++++-- pipelines/braindecode_Deep4Net.py | 9 ++++- pipelines/braindecode_EEGInception.py | 9 ++++- pipelines/braindecode_EEGNetv4.py | 11 ++++- pipelines/braindecode_ShallowFBCSPNet.py | 9 ++++- 24 files changed, 273 insertions(+), 56 deletions(-) rename examples/{plot_load_model.py => load_model.py} (100%) create mode 100644 examples/pipelines_save/Keras_DeepConvNet.yml diff --git a/examples/plot_load_model.py b/examples/load_model.py similarity index 100% rename from examples/plot_load_model.py rename to examples/load_model.py diff --git a/examples/pipelines_DL/Keras_DeepConvNet.yml b/examples/pipelines_DL/Keras_DeepConvNet.yml index d94376a8a..911b6c21b 100644 --- a/examples/pipelines_DL/Keras_DeepConvNet.yml +++ b/examples/pipelines_DL/Keras_DeepConvNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1002/hbm.23730 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 250 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,7 +27,7 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 + learning_rate: 0.001 epochs: 2 batch_size: 1 verbose: 0 diff --git a/examples/pipelines_DL/Keras_EEGITNet.yml b/examples/pipelines_DL/Keras_EEGITNet.yml index c8081dffe..a625a9413 100644 --- a/examples/pipelines_DL/Keras_EEGITNet.yml +++ b/examples/pipelines_DL/Keras_EEGITNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1109/ACCESS.2022.3161489 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 128 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,7 +27,7 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 + learning_rate: 0.001 epochs: 2 batch_size: 1 verbose: 0 diff --git a/examples/pipelines_DL/Keras_EEGNeX.yml b/examples/pipelines_DL/Keras_EEGNeX.yml index c52edbd6c..c9da8efec 100644 --- a/examples/pipelines_DL/Keras_EEGNeX.yml +++ b/examples/pipelines_DL/Keras_EEGNeX.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.48550/arXiv.2207.12369 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 128 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,7 +27,7 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 + learning_rate: 0.001 epochs: 2 batch_size: 1 verbose: 0 diff --git a/examples/pipelines_DL/Keras_EEGNet_8_2.yml b/examples/pipelines_DL/Keras_EEGNet_8_2.yml index 25604b3a2..28a4340b6 100644 --- a/examples/pipelines_DL/Keras_EEGNet_8_2.yml +++ b/examples/pipelines_DL/Keras_EEGNet_8_2.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1088/1741-2552/aace8c pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 128 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,7 +27,7 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 + learning_rate: 0.001 epochs: 2 batch_size: 1 verbose: 0 diff --git a/examples/pipelines_DL/Keras_EEGTCNet.yml b/examples/pipelines_DL/Keras_EEGTCNet.yml index ddb00ac4f..8008283c6 100644 --- a/examples/pipelines_DL/Keras_EEGTCNet.yml +++ b/examples/pipelines_DL/Keras_EEGTCNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1109/SMC42975.2020.9283028 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 250 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,7 +27,7 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 + learning_rate: 0.001 epochs: 2 batch_size: 1 verbose: 0 diff --git a/examples/pipelines_DL/Keras_ShallowConvNet.yml b/examples/pipelines_DL/Keras_ShallowConvNet.yml index 39936d0af..52f609dce 100644 --- a/examples/pipelines_DL/Keras_ShallowConvNet.yml +++ b/examples/pipelines_DL/Keras_ShallowConvNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1002/hbm.23730 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 250 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,7 +27,7 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 + learning_rate: 0.001 epochs: 2 batch_size: 1 verbose: 0 diff --git a/examples/pipelines_braindecode/braindecode_EEGInception.py b/examples/pipelines_braindecode/braindecode_EEGInception.py index 9fd6c5b60..b18e8033f 100644 --- a/examples/pipelines_braindecode/braindecode_EEGInception.py +++ b/examples/pipelines_braindecode/braindecode_EEGInception.py @@ -5,6 +5,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit +from moabb.pipelines.features import Resampler_Epoch from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -22,10 +23,10 @@ PATIENCE = 3 # Create the dataset -create_dataset = BraindecodeDatasetLoader(drop_last_window=False) +create_dataset = BraindecodeDatasetLoader() -# Set EEG Inception model -model = EEGInception(in_channels=1, n_classes=2) +# Set random Model +model = EEGInception(in_channels=1, n_classes=2, input_window_samples=100) # Define a Skorch classifier clf = EEGClassifier( @@ -53,7 +54,13 @@ ) # Create the pipelines -pipes = Pipeline([("Braindecode_dataset", create_dataset), ("EEGInception", clf)]) +pipes = Pipeline( + [ + ("resample", Resampler_Epoch(128)), + ("braindecode_dataset", create_dataset), + ("EEGInception", clf), + ] +) # this is what will be loaded PIPELINE = { diff --git a/examples/pipelines_braindecode/braindecode_EEGNetv4.py b/examples/pipelines_braindecode/braindecode_EEGNetv4.py index e37844c02..3aba86787 100644 --- a/examples/pipelines_braindecode/braindecode_EEGNetv4.py +++ b/examples/pipelines_braindecode/braindecode_EEGNetv4.py @@ -5,6 +5,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit +from moabb.pipelines.features import Resampler_Epoch from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -22,9 +23,9 @@ PATIENCE = 3 # Create the dataset -create_dataset = BraindecodeDatasetLoader(drop_last_window=False) +create_dataset = BraindecodeDatasetLoader() -# Set EEGNetv4 model +# Set random Model model = EEGNetv4(in_chans=1, n_classes=2, input_window_samples=100) # Define a Skorch classifier @@ -53,11 +54,17 @@ ) # Create the pipelines -pipes = Pipeline([("braindecode_dataset", create_dataset), ("EEGNetv4", clf)]) +pipes = Pipeline( + [ + ("resample", Resampler_Epoch(128)), + ("braindecode_dataset", create_dataset), + ("EEGNetv4", clf), + ] +) # this is what will be loaded PIPELINE = { - "name": "braindecode_EEGNetv4", + "name": "braindecode_EEGNetv4_resample", "paradigms": ["LeftRightImagery", "MotorImagery"], "pipeline": pipes, "citations": "https://doi.org/10.1088/1741-2552/aace8c", diff --git a/examples/pipelines_braindecode/braindecode_ShallowFBCSPNet.py b/examples/pipelines_braindecode/braindecode_ShallowFBCSPNet.py index be19a5cf5..79c8f2bf8 100644 --- a/examples/pipelines_braindecode/braindecode_ShallowFBCSPNet.py +++ b/examples/pipelines_braindecode/braindecode_ShallowFBCSPNet.py @@ -5,6 +5,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit +from moabb.pipelines.features import Resampler_Epoch from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -18,13 +19,13 @@ BATCH_SIZE = 64 SEED = 42 VERBOSE = 1 -EPOCH = 10 +EPOCH = 5 PATIENCE = 3 # Create the dataset -create_dataset = BraindecodeDatasetLoader(drop_last_window=False) +create_dataset = BraindecodeDatasetLoader() -# Set Shallow Filter Bank CSP Net model +# Set random Model model = ShallowFBCSPNet( in_chans=1, n_classes=2, input_window_samples=100, final_conv_length="auto" ) @@ -55,12 +56,17 @@ ) # Create the pipelines -pipes = Pipeline([("braindecode_dataset", create_dataset), ("ShallowFBCSPNet", clf)]) +pipes = Pipeline( + [ + ("resample", Resampler_Epoch(250)), + ("braindecode_dataset", create_dataset), + ("ShallowFBCSPNet", clf), + ] +) # this is what will be loaded PIPELINE = { "name": "braindecode_ShallowFBCSPNet", "paradigms": ["LeftRightImagery", "MotorImagery"], "pipeline": pipes, - "citations": "https://doi.org/10.1002/hbm.23730", } diff --git a/examples/pipelines_save/Keras_DeepConvNet.yml b/examples/pipelines_save/Keras_DeepConvNet.yml new file mode 100644 index 000000000..4fb5ce5ee --- /dev/null +++ b/examples/pipelines_save/Keras_DeepConvNet.yml @@ -0,0 +1,40 @@ +name: Keras_DeepConvNet + +paradigms: + - LeftRightImagery + - MotorImagery + +citations: + - https://doi.org/10.1002/hbm.23730 + +pipeline: + - name: StandardScaler_Epoch + from: moabb.pipelines.features + + - name: KerasDeepConvNet + from: moabb.pipelines.deep_learning + parameters: + loss: "sparse_categorical_crossentropy" + optimizer: + - name: Adam + from: tensorflow.keras.optimizers.legacy + parameters: + learning_rate: 0.001 + epochs: 2 + batch_size: 1 + verbose: 0 + random_state: 42 + validation_split: 0.2 + callbacks: + - name: EarlyStopping + from: tensorflow.keras.callbacks + parameters: + monitor: "val_loss" + patience: 300 + + - name: ReduceLROnPlateau + from: tensorflow.keras.callbacks + parameters: + monitor: "val_loss" + patience: 300 + factor: 0.5 diff --git a/moabb/benchmark.py b/moabb/benchmark.py index 514e09ea7..5aae3a505 100644 --- a/moabb/benchmark.py +++ b/moabb/benchmark.py @@ -164,7 +164,7 @@ def benchmark( # noqa: C901 ppl_with_epochs, ppl_with_array = {}, {} for pn, pv in prdgms[paradigm].items(): - if "braindecode" in pn: + if "braindecode" in pn or "Keras" in pn: ppl_with_epochs[pn] = pv else: ppl_with_array[pn] = pv diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 2e042cb10..3d8f94cab 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -278,19 +278,19 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): return_estimator=True, ) score = results["test_score"].mean() + if self.hdf5_path is not None: + save_model_list( + results["estimator"], + score_list=results["test_score"], + save_path=model_save_path, + ) + if _carbonfootprint: emissions = tracker.stop() if emissions is None: emissions = np.NaN duration = time() - t_start - if self.hdf5_path is not None: - save_model_list( - results["estimator"], - score_list=results["test_score"], - save_path=model_save_path, - ) - nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { "time": duration / 5.0, # 5 fold CV diff --git a/moabb/pipelines/features.py b/moabb/pipelines/features.py index fc4641ea3..329485904 100644 --- a/moabb/pipelines/features.py +++ b/moabb/pipelines/features.py @@ -1,3 +1,4 @@ +import mne import numpy as np import scipy.signal as signal from sklearn.base import BaseEstimator, TransformerMixin @@ -124,3 +125,35 @@ def transform(self, X): X_fin = np.array(X_fin) return X_fin + + +class Resampler_Epoch(BaseEstimator, TransformerMixin): + """ + Function that copies and resamples an epochs object + """ + + def __init__(self, sfreq): + self.sfreq = sfreq + + def fit(self, X, y): + return self + + def transform(self, X: mne.Epochs): + X = X.copy() + X.resample(self.sfreq) + return X + + +class Convert_Epoch_Array(BaseEstimator, TransformerMixin): + """ + Function that copies and resamples an epochs object + """ + + def __init__(self): + """Init.""" + + def fit(self, X, y): + return self + + def transform(self, X: mne.Epochs): + return X.get_data() diff --git a/pipelines/Keras_DeepConvNet.yml b/pipelines/Keras_DeepConvNet.yml index 31be8dc24..27d3fd05e 100644 --- a/pipelines/Keras_DeepConvNet.yml +++ b/pipelines/Keras_DeepConvNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1002/hbm.23730 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 250 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,8 +27,8 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 - epochs: 1000 + learning_rate: 0.001 + epochs: 300 batch_size: 64 verbose: 0 random_state: 42 @@ -30,11 +38,11 @@ pipeline: from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 - name: ReduceLROnPlateau from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 factor: 0.5 diff --git a/pipelines/Keras_EEGITNet.yml b/pipelines/Keras_EEGITNet.yml index 39c01df94..22b2f2286 100644 --- a/pipelines/Keras_EEGITNet.yml +++ b/pipelines/Keras_EEGITNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1109/ACCESS.2022.3161489 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 128 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,8 +27,8 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 - epochs: 1000 + learning_rate: 0.001 + epochs: 300 batch_size: 64 verbose: 0 random_state: 42 @@ -30,11 +38,11 @@ pipeline: from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 - name: ReduceLROnPlateau from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 factor: 0.5 diff --git a/pipelines/Keras_EEGNeX.yml b/pipelines/Keras_EEGNeX.yml index 64634d07e..cfeba7062 100644 --- a/pipelines/Keras_EEGNeX.yml +++ b/pipelines/Keras_EEGNeX.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.48550/arXiv.2207.12369 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 128 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,8 +27,8 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 - epochs: 1000 + learning_rate: 0.001 + epochs: 300 batch_size: 64 verbose: 0 random_state: 42 @@ -30,11 +38,11 @@ pipeline: from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 - name: ReduceLROnPlateau from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 factor: 0.5 diff --git a/pipelines/Keras_EEGNet_8_2.yml b/pipelines/Keras_EEGNet_8_2.yml index abf68e14f..ea17ce2a3 100644 --- a/pipelines/Keras_EEGNet_8_2.yml +++ b/pipelines/Keras_EEGNet_8_2.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1088/1741-2552/aace8c pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 128 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,8 +27,8 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 - epochs: 1000 + learning_rate: 0.001 + epochs: 300 batch_size: 64 verbose: 0 random_state: 42 @@ -30,11 +38,11 @@ pipeline: from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 - name: ReduceLROnPlateau from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 factor: 0.5 diff --git a/pipelines/Keras_EEGTCNet.yml b/pipelines/Keras_EEGTCNet.yml index c88f8783f..97244c498 100644 --- a/pipelines/Keras_EEGTCNet.yml +++ b/pipelines/Keras_EEGTCNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1109/SMC42975.2020.9283028 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 250 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,8 +27,8 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 - epochs: 1000 + learning_rate: 0.001 + epochs: 300 batch_size: 64 verbose: 0 random_state: 42 @@ -30,11 +38,11 @@ pipeline: from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 - name: ReduceLROnPlateau from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 factor: 0.5 diff --git a/pipelines/Keras_ShallowConvNet.yml b/pipelines/Keras_ShallowConvNet.yml index c7c2ff9d4..ae6a28979 100644 --- a/pipelines/Keras_ShallowConvNet.yml +++ b/pipelines/Keras_ShallowConvNet.yml @@ -8,6 +8,14 @@ citations: - https://doi.org/10.1002/hbm.23730 pipeline: + - name: Resampler_Epoch + from: moabb.pipelines.features + parameters: + sfreq: 250 + + - name: Convert_Epoch_Array + from: moabb.pipelines.features + - name: StandardScaler_Epoch from: moabb.pipelines.features @@ -19,8 +27,8 @@ pipeline: - name: Adam from: tensorflow.keras.optimizers.legacy parameters: - learning_rate: 0.0009 - epochs: 1000 + learning_rate: 0.001 + epochs: 300 batch_size: 64 verbose: 0 random_state: 42 @@ -30,11 +38,11 @@ pipeline: from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 - name: ReduceLROnPlateau from: tensorflow.keras.callbacks parameters: monitor: "val_loss" - patience: 300 + patience: 75 factor: 0.5 diff --git a/pipelines/braindecode_Deep4Net.py b/pipelines/braindecode_Deep4Net.py index 4b7247905..9a502534d 100644 --- a/pipelines/braindecode_Deep4Net.py +++ b/pipelines/braindecode_Deep4Net.py @@ -5,6 +5,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit +from moabb.pipelines.features import Resampler_Epoch from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -55,7 +56,13 @@ ) # Create the pipelines -pipes = Pipeline([("braindecode_dataset", create_dataset), ("Deep4Net", clf)]) +pipes = Pipeline( + [ + ("resample", Resampler_Epoch(250)), + ("braindecode_dataset", create_dataset), + ("Deep4Net", clf), + ] +) # this is what will be loaded PIPELINE = { diff --git a/pipelines/braindecode_EEGInception.py b/pipelines/braindecode_EEGInception.py index 78ecc3596..12b711082 100644 --- a/pipelines/braindecode_EEGInception.py +++ b/pipelines/braindecode_EEGInception.py @@ -5,6 +5,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit +from moabb.pipelines.features import Resampler_Epoch from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -53,7 +54,13 @@ ) # Create the pipelines -pipes = Pipeline([("Braindecode_dataset", create_dataset), ("EEGInception", clf)]) +pipes = Pipeline( + [ + ("resample", Resampler_Epoch(128)), + ("braindecode_dataset", create_dataset), + ("EEGInception", clf), + ] +) # this is what will be loaded PIPELINE = { diff --git a/pipelines/braindecode_EEGNetv4.py b/pipelines/braindecode_EEGNetv4.py index a9b5d9d75..af26b3a8f 100644 --- a/pipelines/braindecode_EEGNetv4.py +++ b/pipelines/braindecode_EEGNetv4.py @@ -5,6 +5,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit +from moabb.pipelines.features import Resampler_Epoch from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -53,11 +54,17 @@ ) # Create the pipelines -pipes = Pipeline([("braindecode_dataset", create_dataset), ("EEGNetv4", clf)]) +pipes = Pipeline( + [ + ("resample", Resampler_Epoch(128)), + ("braindecode_dataset", create_dataset), + ("EEGNetv4", clf), + ] +) # this is what will be loaded PIPELINE = { - "name": "braindecode_EEGNetv4", + "name": "braindecode_EEGNetv4_resample", "paradigms": ["LeftRightImagery", "MotorImagery"], "pipeline": pipes, "citations": "https://doi.org/10.1088/1741-2552/aace8c", diff --git a/pipelines/braindecode_ShallowFBCSPNet.py b/pipelines/braindecode_ShallowFBCSPNet.py index 528f281bb..f9d42952f 100644 --- a/pipelines/braindecode_ShallowFBCSPNet.py +++ b/pipelines/braindecode_ShallowFBCSPNet.py @@ -5,6 +5,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit +from moabb.pipelines.features import Resampler_Epoch from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -55,7 +56,13 @@ ) # Create the pipelines -pipes = Pipeline([("braindecode_dataset", create_dataset), ("ShallowFBCSPNet", clf)]) +pipes = Pipeline( + [ + ("resample", Resampler_Epoch(250)), + ("braindecode_dataset", create_dataset), + ("ShallowFBCSPNet", clf), + ] +) # this is what will be loaded PIPELINE = { From e3b520a4215c088df5eb2eaee7c45c18689b26af Mon Sep 17 00:00:00 2001 From: gcattan Date: Sat, 8 Jul 2023 16:08:11 +0200 Subject: [PATCH 18/64] CompoundDataset (#410) * Go shopping dataset (#3) * Introduce Go-Shopping dataset: tutorial, tests, and children with bi illiteracy * add block_rep method * add spd method to restingstate * fix number of subject in VirtualReality * [pre-commit.ci] auto fixes from pre-commit.com hooks * flake8 * flake8 bis * Rename GoShopping by CompoundDataset * [pre-commit.ci] auto fixes from pre-commit.com hooks * missing modification * fix dataset_list empty * [pre-commit.ci] auto fixes from pre-commit.com hooks --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru --- docs/source/dataset_summary.rst | 21 ++- docs/source/datasets.rst | 24 +++ examples/plot_phmd_ml_spectrum.py | 4 +- moabb/datasets/__init__.py | 2 + moabb/datasets/braininvaders.py | 35 ++-- moabb/datasets/compound_dataset/__init__.py | 10 ++ moabb/datasets/compound_dataset/base.py | 119 ++++++++++++++ .../compound_dataset/bi_illiteracy.py | 150 ++++++++++++++++++ moabb/datasets/fake.py | 3 +- moabb/datasets/utils.py | 24 ++- moabb/paradigms/resting_state.py | 8 + moabb/tests/datasets.py | 100 +++++++++++- .../tutorial_5_build_a_custom_dataset.py | 126 +++++++++++++++ 13 files changed, 593 insertions(+), 33 deletions(-) create mode 100644 moabb/datasets/compound_dataset/__init__.py create mode 100644 moabb/datasets/compound_dataset/base.py create mode 100644 moabb/datasets/compound_dataset/bi_illiteracy.py create mode 100644 tutorials/tutorial_5_build_a_custom_dataset.py diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst index 2f02e1276..b1986908e 100644 --- a/docs/source/dataset_summary.rst +++ b/docs/source/dataset_summary.rst @@ -53,7 +53,7 @@ P300/ERP bi2014b, 37, 32, 200 NT / 40 T, 1s, 512Hz, 3 bi2015a, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 bi2015b, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 2 - VirtualReality, 24, 16, 600 NT / 120 T, 1s, 512Hz, 2 + VirtualReality, 21, 16, 600 NT / 120 T, 1s, 512Hz, 2 Huebner2017, 13, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 Huebner2018, 12, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 Sosulski2019, 13, 31, 75 NT / 15 T, , 1000Hz, 3 @@ -92,6 +92,25 @@ is a resting state experiment. HeadMountedDisplay,12,16,2,10,60s,512Hz,1 +Compound Datasets +====================== + +Compound Datasets are datasets compounded with subjects from other datasets. +It is useful for merging different datasets (including other Compound Datasets), +select a sample of subject inside a dataset (e.g. subject with high/low performance). + +.. csv-table:: + :header: Dataset, #Subj, #Original datasets + :class: sortable + + bi2014a_il,17,bi2014a + bi2014b_il,11,bi2014b + bi2015a_il,2,bi2015a + bi2015b_il,25,bi2015b + VirtualReality_il,4,VirtualReality + biIlliteracy,59,bi2014a_il bi2014b_il bi2015a_il bi2015b_il VirtualReality_il + + Submit a new dataset ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 345496196..2d8825689 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -112,3 +112,27 @@ Base & Utils download.fs_get_file_name utils.dataset_search utils.find_intersecting_channels + + +==================== +Compound Datasets +==================== + +.. automodule:: moabb.datasets.compound_dataset + +.. currentmodule:: moabb.datasets.compound_dataset + +------------ +ERP Datasets +------------ + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + bi2014a_il + bi2014b_il + bi2015a_il + bi2015b_il + VirtualReality_il + biIlliteracy diff --git a/examples/plot_phmd_ml_spectrum.py b/examples/plot_phmd_ml_spectrum.py index 722f7c91a..aee76c249 100644 --- a/examples/plot_phmd_ml_spectrum.py +++ b/examples/plot_phmd_ml_spectrum.py @@ -17,7 +17,6 @@ import matplotlib.pyplot as plt import numpy as np -from scipy.signal import welch from moabb.datasets import HeadMountedDisplay from moabb.paradigms import RestingStateToP300Adapter @@ -50,8 +49,7 @@ # 1) Obtain the epochs for the specified subject. # 2) Use Welch's method to estimate the power spectral density. -X, y, _ = paradigm.get_data(dataset, [subject]) -f, S = welch(X, axis=-1, nperseg=1024, fs=paradigm.resample) +f, S, _, y = paradigm.psd(subject, dataset) ############################################################################### # Display of the data diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index dfed77561..52de2b897 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -7,6 +7,8 @@ See https://github.com/NeuroTechX/moabb/wiki/Datasets-Support for detail on datasets (electrodes, number of trials, sessions, etc.) """ +from . import compound_dataset + # flake8: noqa from .alex_mi import AlexMI from .bbci_eeg_fnirs import Shin2017A, Shin2017B diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index d32787912..d0cbd0189 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -15,6 +15,7 @@ from moabb.datasets import download as dl from moabb.datasets.base import BaseDataset +from moabb.datasets.utils import block_rep BI2012a_URL = "https://zenodo.org/record/2649069/files/" @@ -217,9 +218,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 end = idx_repetEndin_local[j + 1] Xbij = Xbi[:, start:end] raw = mne.io.RawArray(data=Xbij, info=info, verbose=False) - sessions[session_name][ - "block_" + str(bi + 1) + "-repetition_" + str(j + 1) - ] = raw + sessions[session_name][block_rep(bi + 1, j + 1)] = raw return sessions @@ -818,10 +817,9 @@ class VirtualReality(BaseDataset): ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - VirtualReality 24 16 600 NT / 120 T 1s 512Hz 2 + VirtualReality 21 16 600 NT / 120 T 1s 512Hz 2 ================ ======= ======= ================ =============== =============== =========== - We describe the experimental procedures for a dataset that we have made publicly available at https://doi.org/10.5281/zenodo.2605204 in mat (Mathworks, Natick, USA) and csv formats [1]_. This dataset contains electroencephalographic recordings on 21 @@ -836,6 +834,9 @@ class VirtualReality(BaseDataset): does not include any electronics at the exception of a smartphone. A full description of the experiment is available at https://hal.archives-ouvertes.fr/hal-02078533. + See the example `plot_vr_pc_p300_different_epoch_size` to compare the performance + between PC and VR. + Parameters ---------- virtual_reality: bool (default False) @@ -859,7 +860,7 @@ class VirtualReality(BaseDataset): def __init__(self, virtual_reality=False, screen_display=True): super().__init__( - subjects=list(range(1, 20 + 1)), + subjects=list(range(1, 21 + 1)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), code="P300-VR", @@ -920,24 +921,10 @@ def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): meta_select = [] for block in block_list: for repetition in repetition_list: - X_select.append( - X[ - meta["run"] - == "block_" + str(block) + "-repetition_" + str(repetition) - ] - ) - labels_select.append( - labels[ - meta["run"] - == "block_" + str(block) + "-repetition_" + str(repetition) - ] - ) - meta_select.append( - meta[ - meta["run"] - == "block_" + str(block) + "-repetition_" + str(repetition) - ] - ) + run = block_rep(block, repetition) + X_select.append(X[meta["run"] == run]) + labels_select.append(labels[meta["run"] == run]) + meta_select.append(meta[meta["run"] == run]) X_select = np.concatenate(X_select) labels_select = np.concatenate(labels_select) meta_select = np.concatenate(meta_select) diff --git a/moabb/datasets/compound_dataset/__init__.py b/moabb/datasets/compound_dataset/__init__.py new file mode 100644 index 000000000..c7a6ffecd --- /dev/null +++ b/moabb/datasets/compound_dataset/__init__.py @@ -0,0 +1,10 @@ +# flake8: noqa +from .base import CompoundDataset +from .bi_illiteracy import ( + VirtualReality_il, + bi2014a_il, + bi2014b_il, + bi2015a_il, + bi2015b_il, + biIlliteracy, +) diff --git a/moabb/datasets/compound_dataset/base.py b/moabb/datasets/compound_dataset/base.py new file mode 100644 index 000000000..77ebde66a --- /dev/null +++ b/moabb/datasets/compound_dataset/base.py @@ -0,0 +1,119 @@ +""" +Build a custom dataset using subjects from other datasets. +""" + +from ..base import BaseDataset + + +class CompoundDataset(BaseDataset): + """With this dataset, you can merge different dataset + by selecting among subjects in all datasets + to build a custom dataset. + + + Parameters + ---------- + subjects_list: List[Union[tuple, CompoundDataset]] + A list of subject or CompoundDataset (exclusive). + Example, with a list of selected subject: + [ + (bi2013(), 1, "session_0", "run_0") + (bi2014(), 1, "session_0", None) + ] + Example of building a dataset compounded of CompoundDatasets: + [ + CompoundDataset(subjects_list1), + CompoundDataset(subjects_list2) + ] + + sessions_per_subject: int + Number of sessions per subject (if varying, take minimum) + + events: dict of strings + String codes for events matched with labels in the stim channel. + See `BaseDataset`. + + code: string + Unique identifier for dataset, used in all plots + + interval: list with 2 entries + See `BaseDataset`. + + paradigm: ['p300','imagery', 'ssvep', 'rstate'] + Defines what sort of dataset this is + """ + + def __init__( + self, subjects_list: list, events: dict, code: str, interval: list, paradigm: str + ): + self._set_subjects_list(subjects_list) + super().__init__( + subjects=list(range(1, self.count + 1)), + sessions_per_subject=self._get_sessions_per_subject(), + events=events, + code=code, + interval=interval, + paradigm=paradigm, + ) + + @property + def count(self): + return len(self.subjects_list) + + def _get_sessions_per_subject(self): + n_sessions = -1 + for value in self.subjects_list: + sessions = value[2] + size = len(sessions) if isinstance(sessions, list) else 1 + if sessions is None: + dataset = value[0] + size = dataset.n_sessions + if n_sessions == -1: + n_sessions = size + else: + n_sessions = min(n_sessions, size) + return n_sessions + + def _set_subjects_list(self, subjects_list: list): + if isinstance(subjects_list[0], tuple): + self.subjects_list = subjects_list + else: + self.subjects_list = [] + for compoundDataset in subjects_list: + self.subjects_list.extend(compoundDataset.subjects_list) + + def _get_single_subject_data(self, shopped_subject): + """return data for a single subject""" + dataset, subject, sessions, runs = self.subjects_list[shopped_subject - 1] + subject_data = dataset._get_single_subject_data(subject) + if sessions is None: + return subject_data + elif isinstance(sessions, list): + sessions_data = {f"{session}": subject_data[session] for session in sessions} + else: + sessions_data = {f"{sessions}": subject_data[sessions]} + + if runs is None: + return sessions_data + elif isinstance(runs, list): + for session in sessions_data.keys(): + sessions_data[session] = { + f"{run}": sessions_data[session][run] for run in runs + } + return sessions_data + else: + for session in sessions_data.keys(): + sessions_data[session] = {f"{runs}": sessions_data[session][runs]} + return sessions_data + + def data_path( + self, + shopped_subject, + path=None, + force_update=False, + update_path=None, + verbose=None, + ): + dataset, subject, _, _ = self.subjects_list[shopped_subject - 1] + path = dataset.data_path(subject) + return path diff --git a/moabb/datasets/compound_dataset/bi_illiteracy.py b/moabb/datasets/compound_dataset/bi_illiteracy.py new file mode 100644 index 000000000..f973b75cb --- /dev/null +++ b/moabb/datasets/compound_dataset/bi_illiteracy.py @@ -0,0 +1,150 @@ +from ..braininvaders import VirtualReality, bi2014a, bi2014b, bi2015a, bi2015b +from .base import CompoundDataset + + +class _base_bi_il(CompoundDataset): + def __init__(self, subjects_list, dataset=None): + code = "Illiteracy" if dataset is None else f"{dataset.code}+IL" + CompoundDataset.__init__( + self, + subjects_list=subjects_list, + events=dict(Target=2, NonTarget=1), + code=code, + interval=[0, 1.0], + paradigm="p300", + ) + + +class bi2014a_il(_base_bi_il): + """A selection of subject from bi2014a with AUC < 0.7 with pipeline: + ERPCovariances(estimator="lwf"), MDM(metric="riemann") + """ + + def __init__(self): + dataset = bi2014a() + subjects_list = [ + (dataset, 4, None, None), + (dataset, 7, None, None), + (dataset, 33, None, None), + (dataset, 34, None, None), + (dataset, 36, None, None), + (dataset, 38, None, None), + (dataset, 42, None, None), + (dataset, 45, None, None), + (dataset, 46, None, None), + (dataset, 47, None, None), + (dataset, 48, None, None), + (dataset, 50, None, None), + (dataset, 51, None, None), + (dataset, 52, None, None), + (dataset, 53, None, None), + (dataset, 55, None, None), + (dataset, 61, None, None), + ] + _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) + + +class bi2014b_il(_base_bi_il): + """A selection of subject from bi2014b with AUC < 0.7 with pipeline: + ERPCovariances(estimator="lwf"), MDM(metric="riemann") + """ + + def __init__(self): + dataset = bi2014b() + subjects_list = [ + (dataset, 2, None, None), + (dataset, 7, None, None), + (dataset, 10, None, None), + (dataset, 13, None, None), + (dataset, 14, None, None), + (dataset, 17, None, None), + (dataset, 23, None, None), + (dataset, 26, None, None), + (dataset, 33, None, None), + (dataset, 35, None, None), + (dataset, 36, None, None), + ] + _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) + + +class bi2015a_il(_base_bi_il): + """A selection of subject from bi2015a with AUC < 0.7 with pipeline: + ERPCovariances(estimator="lwf"), MDM(metric="riemann") + """ + + def __init__(self): + dataset = bi2015a() + subjects_list = [ + (dataset, 1, ["session_1", "session_2", "session_3"], None), + (dataset, 39, ["session_2", "session_3"], None), + ] + _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) + + +class bi2015b_il(_base_bi_il): + """A selection of subject from bi2015b with AUC < 0.7 with pipeline: + ERPCovariances(estimator="lwf"), MDM(metric="riemann") + """ + + def __init__(self): + dataset = bi2015b() + subjects_list = [ + (dataset, 2, None, None), + (dataset, 4, None, None), + (dataset, 6, None, None), + (dataset, 8, None, None), + (dataset, 10, None, None), + (dataset, 12, None, None), + (dataset, 14, None, None), + (dataset, 16, None, None), + (dataset, 18, None, None), + (dataset, 20, None, None), + (dataset, 22, None, None), + (dataset, 24, None, None), + (dataset, 26, None, None), + (dataset, 28, None, None), + (dataset, 30, None, None), + (dataset, 32, None, None), + (dataset, 33, None, None), + (dataset, 34, None, None), + (dataset, 35, None, None), + (dataset, 36, None, None), + (dataset, 38, None, None), + (dataset, 40, None, None), + (dataset, 41, None, None), + (dataset, 42, None, None), + (dataset, 44, None, None), + ] + _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) + + +class VirtualReality_il(_base_bi_il): + """A selection of subject from VirtualReality with AUC < 0.7 with pipeline: + ERPCovariances(estimator="lwf"), MDM(metric="riemann") + """ + + def __init__(self): + dataset = VirtualReality(virtual_reality=True, screen_display=True) + subjects_list = [ + (dataset, 4, None, None), + (dataset, 10, None, None), + (dataset, 13, "VR", None), + (dataset, 15, "VR", None), + ] + _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) + + +class biIlliteracy(_base_bi_il): + """Subjects from braininvaders datasets with AUC < 0.7 with pipeline: + ERPCovariances(estimator="lwf"), MDM(metric="riemann") + """ + + def __init__(self): + subjects_list = [ + bi2014a_il(), + bi2014b_il(), + bi2015a_il(), + bi2015b_il(), + VirtualReality_il(), + ] + _base_bi_il.__init__(self, subjects_list=subjects_list) diff --git a/moabb/datasets/fake.py b/moabb/datasets/fake.py index ba24930bd..93a4243dd 100644 --- a/moabb/datasets/fake.py +++ b/moabb/datasets/fake.py @@ -5,6 +5,7 @@ from moabb.datasets.base import BaseDataset from moabb.datasets.braininvaders import VirtualReality +from moabb.datasets.utils import block_rep class FakeDataset(BaseDataset): @@ -112,7 +113,7 @@ def _get_single_subject_data(self, subject): for block in range(self.n_blocks): for repetition in range(self.n_repetitions): data[f"{session}"][ - f"block_{block}-repetition_{repetition}" + block_rep(block, repetition) ] = self._generate_raw() return data diff --git a/moabb/datasets/utils.py b/moabb/datasets/utils.py index f839268bf..8d004642a 100644 --- a/moabb/datasets/utils.py +++ b/moabb/datasets/utils.py @@ -9,9 +9,16 @@ dataset_list = [] -for ds in inspect.getmembers(db, inspect.isclass): - if issubclass(ds[1], BaseDataset): - dataset_list.append(ds[1]) + + +def _init_dataset_list(): + for ds in inspect.getmembers(db, inspect.isclass): + print("ds", ds) + if issubclass(ds[1], BaseDataset): + dataset_list.append(ds[1]) + + +_init_dataset_list() def dataset_search( # noqa: C901 @@ -51,6 +58,9 @@ def dataset_search( # noqa: C901 channels: list of str list or set of channels """ + if len(dataset_list) == 0: + _init_dataset_list() + channels = set(channels) out_data = [] if events is not None and has_all_events: @@ -150,3 +160,11 @@ def _download_all(update_path=True, verbose=None): for ds in dataset_list: # call download ds().download(update_path=True, verbose=verbose, accept=True) + + +def block_rep(block: int, rep: int): + return f"block_{block}-repetition_{rep}" + + +def blocks_reps(blocks: list, reps: list): + return [block_rep(b, r) for b in blocks for r in reps] diff --git a/moabb/paradigms/resting_state.py b/moabb/paradigms/resting_state.py index f41ab4c67..52eb40c31 100644 --- a/moabb/paradigms/resting_state.py +++ b/moabb/paradigms/resting_state.py @@ -9,6 +9,8 @@ is a resting state experiment. """ +from scipy.signal import welch + from moabb.paradigms.p300 import SinglePass @@ -76,6 +78,12 @@ def is_valid(self, dataset): return ret + def psd(self, subject, dataset): + # power spectrum density for ease of use + X, y, _ = self.get_data(dataset, [subject]) + f, S = welch(X, axis=-1, nperseg=1024, fs=self.resample) + return (f, S, X, y) + @property def scoring(self): return "roc_auc" diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index daa66b40d..4143619ee 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -3,7 +3,9 @@ import mne from moabb.datasets import Shin2017A, Shin2017B, VirtualReality +from moabb.datasets.compound_dataset import CompoundDataset from moabb.datasets.fake import FakeDataset, FakeVirtualRealityDataset +from moabb.datasets.utils import block_rep from moabb.paradigms import P300 @@ -94,4 +96,100 @@ def test_get_block_repetition(self): repetition = 4 _, _, ret = ds.get_block_repetition(P300(), [subject], [block], [repetition]) assert ret.subject.unique()[0] == subject - assert ret.run.unique()[0] == f"block_{block}-repetition_{repetition}" + assert ret.run.unique()[0] == block_rep(block, repetition) + + +class Test_CompoundDataset(unittest.TestCase): + def __init__(self, *args, **kwargs): + self.paradigm = "p300" + self.n_sessions = 2 + self.n_subjects = 2 + self.n_runs = 2 + self.ds = FakeDataset( + n_sessions=self.n_sessions, + n_runs=self.n_runs, + n_subjects=self.n_subjects, + event_list=["Target", "NonTarget"], + paradigm=self.paradigm, + ) + super().__init__(*args, **kwargs) + + def test_fake_dataset(self): + """this test will insure the basedataset works""" + param_list = [(None, None), ("session_0", "run_0"), (["session_0"], ["run_0"])] + for sessions, runs in param_list: + with self.subTest(): + subjects_list = [(self.ds, 1, sessions, runs)] + compound_data = CompoundDataset( + subjects_list, + events=dict(Target=2, NonTarget=1), + code="CompoundTest", + interval=[0, 1], + paradigm=self.paradigm, + ) + + data = compound_data.get_data() + + # Check data type + self.assertTrue(isinstance(data, dict)) + self.assertEqual(type(data[1]["session_0"]["run_0"]), mne.io.RawArray) + + # Check data size + self.assertEqual(len(data), 1) + expected_session_number = self.n_sessions if sessions is None else 1 + self.assertEqual(len(data[1]), expected_session_number) + expected_runs_number = self.n_runs if runs is None else 1 + self.assertEqual(len(data[1]["session_0"]), expected_runs_number) + + # bad subject id must raise error + self.assertRaises(ValueError, compound_data.get_data, [1000]) + + def test_compound_dataset_composition(self): + # Test we can compound two instance of CompoundDataset into a new one. + + # Create an instance of CompoundDataset with one subject + subjects_list = [(self.ds, 1, None, None)] + compound_dataset = CompoundDataset( + subjects_list, + events=dict(Target=2, NonTarget=1), + code="D1", + interval=[0, 1], + paradigm=self.paradigm, + ) + + # Add it two time to a subjects_list + subjects_list = [compound_dataset, compound_dataset] + compound_data = CompoundDataset( + subjects_list, + events=dict(Target=2, NonTarget=1), + code="CompoundTest", + interval=[0, 1], + paradigm=self.paradigm, + ) + + # Assert that the coumpouned dataset has two times more subject than the original one. + data = compound_data.get_data() + self.assertEqual(len(data), 2) + + def test_get_sessions_per_subject(self): + # define a new fake dataset with two times more sessions: + self.ds2 = FakeDataset( + n_sessions=self.n_sessions * 2, + n_runs=self.n_runs, + n_subjects=self.n_subjects, + event_list=["Target", "NonTarget"], + paradigm=self.paradigm, + ) + + # Add the two datasets to a CompoundDataset + subjects_list = [(self.ds, 1, None, None), (self.ds2, 1, None, None)] + compound_dataset = CompoundDataset( + subjects_list, + events=dict(Target=2, NonTarget=1), + code="CompoundTest", + interval=[0, 1], + paradigm=self.paradigm, + ) + + # Test private method _get_sessions_per_subject returns the minimum number of sessions per subjects + self.assertEqual(compound_dataset._get_sessions_per_subject(), self.n_sessions) diff --git a/tutorials/tutorial_5_build_a_custom_dataset.py b/tutorials/tutorial_5_build_a_custom_dataset.py new file mode 100644 index 000000000..b7ef6b946 --- /dev/null +++ b/tutorials/tutorial_5_build_a_custom_dataset.py @@ -0,0 +1,126 @@ +""" +==================================== +Tutorial 5: Creating a dataset class +==================================== +""" +# Author: Gregoire Cattan +# +# https://github.com/plcrodrigues/Workshop-MOABB-BCI-Graz-2019 + +from pyriemann.classification import MDM +from pyriemann.estimation import ERPCovariances +from sklearn.pipeline import make_pipeline + +from moabb.datasets import VirtualReality +from moabb.datasets.braininvaders import bi2014a +from moabb.datasets.compound_dataset import CompoundDataset +from moabb.datasets.utils import blocks_reps +from moabb.evaluations import WithinSessionEvaluation +from moabb.paradigms.p300 import P300 + + +############################################################################## +# Initialization +# ------------------ +# +# This tutorial illustrates how to use the CompoundDataset to: +# 1) Select a few subjects/sessions/runs in an existing dataset +# 2) Merge two CompoundDataset into a new one +# 3) ... and finally use this new dataset on a pipeline +# (this steps is not specific to CompoundDataset) +# +# Let's define a paradigm and a pipeline for evaluation first. + +paradigm = P300() +pipelines = {} +pipelines["MDM"] = make_pipeline(ERPCovariances(estimator="lwf"), MDM(metric="riemann")) + +############################################################################## +# Creation a selection of subject +# ------------------ +# +# We are going to great two CompoundDataset, namely CustomDataset1 & 2. +# A CompoundDataset accepts a subjects_list of subjects. +# It is a list of tuple. A tuple contains 4 values: +# - the original dataset +# - the subject number to select +# - the sessions. It can be: +# - a session name ('session_0') +# - a list of sessions (['session_0', 'session_1']) +# - `None` to select all the sessions attributed to a subjet +# - the runs. As for sessions, it can be a single run name, a list or `None`` (to select all runs). + + +class CustomDataset1(CompoundDataset): + def __init__(self): + biVR = VirtualReality(virtual_reality=True, screen_display=True) + runs = blocks_reps([1, 3], [1, 2, 3, 4, 5]) + subjects_list = [ + (biVR, 1, "VR", runs), + (biVR, 2, "VR", runs), + ] + CompoundDataset.__init__( + self, + subjects_list=subjects_list, + events=dict(Target=2, NonTarget=1), + code="D1", + interval=[0, 1.0], + paradigm="p300", + ) + + +class CustomDataset2(CompoundDataset): + def __init__(self): + bi2014 = bi2014a() + subjects_list = [ + (bi2014, 4, None, None), + (bi2014, 7, None, None), + ] + CompoundDataset.__init__( + self, + subjects_list=subjects_list, + events=dict(Target=2, NonTarget=1), + code="D2", + interval=[0, 1.0], + paradigm="p300", + ) + + +############################################################################## +# Merging the datasets +# ------------------ +# +# We are now going to merge the two CompoundDataset into a single one. +# The implementation is straigh forward. Instead of providing a list of subjects, +# you should provide a list of CompoundDataset. +# subjects_list = [CustomDataset1(), CustomDataset2()] + + +class CustomDataset3(CompoundDataset): + def __init__(self): + subjects_list = [CustomDataset1(), CustomDataset2()] + CompoundDataset.__init__( + self, + subjects_list=subjects_list, + events=dict(Target=2, NonTarget=1), + code="D3", + interval=[0, 1.0], + paradigm="p300", + ) + + +############################################################################## +# Evaluate and display +# ------------------ +# +# Let's use a WithinSessionEvaluation to evaluate our new dataset. +# If you already new how to do this, nothing changed: +# The CompoundDataset can be used as a `normal` dataset. + +datasets = [CustomDataset3()] +evaluation = WithinSessionEvaluation( + paradigm=paradigm, datasets=datasets, overwrite=False, suffix="newdataset" +) +scores = evaluation.process(pipelines) + +print(scores) From a9f2e4ca3abc02e5fb81d36da06760f11653e550 Mon Sep 17 00:00:00 2001 From: Eli Simhayev Date: Mon, 10 Jul 2023 00:59:13 +0300 Subject: [PATCH 19/64] Update README.md: removed space to fix hyperlink (#418) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 60f653b59..46b61873c 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ the wiki: https://github.com/NeuroTechX/moabb/wiki/Datasets-Support you can submit a new dataset by mentioning it to this [issue](https://github.com/NeuroTechX/moabb/issues/1). The datasets currently on our radar -can be seen [here] (https://github.com/NeuroTechX/moabb/wiki/Datasets-Support) +can be seen [here](https://github.com/NeuroTechX/moabb/wiki/Datasets-Support). ## Who are we? From b50a12d6083dd33e10c53418dfe51bde2da401e6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:56:09 +0100 Subject: [PATCH 20/64] [pre-commit.ci] pre-commit autoupdate (#415) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-prettier: v3.0.0-alpha.6 → v3.0.0-alpha.9-for-vscode](https://github.com/pre-commit/mirrors-prettier/compare/v3.0.0-alpha.6...v3.0.0-alpha.9-for-vscode) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e058c12e4..32c4bcf8c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: additional_dependencies: [flake8-bugbear] - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.6 + rev: v3.0.0-alpha.9-for-vscode hooks: - id: prettier args: [--print-width=90, --prose-wrap=always] From 4fa6a3c376d773349afeeedb8c059719d6bdf73c Mon Sep 17 00:00:00 2001 From: Sara Sedlar Date: Tue, 11 Jul 2023 14:18:08 +0200 Subject: [PATCH 21/64] Multithread mkdir (#421) * Add exist_ok=True flag in mkdir * Updating the whats_new.rst --------- Co-authored-by: Sara Sedlar Co-authored-by: bruAristimunha --- docs/source/whats_new.rst | 2 +- moabb/datasets/download.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 43274be37..0cc97c7ed 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -40,7 +40,7 @@ Bugs - Fixing SSLError from BCI competition IV (:gh:`404` by `Bruno Aristimunha`_) - Fixing :func:`moabb.dataset.bnci.MNEBNCI.data_path` that returned the data itself instead of paths (:gh:`412` by `Pierre Guetschel`_) - Adding :func:`moabb.datasets.fake` in the init file to use in braindecode object (:gh:`414` by `Bruno Aristimunha`_) - +- Fixing the parallel download issue when the dataset have the same directory (:gh:`421` by `Sara Sedlar`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py index 1ecdfadc1..ceb182c56 100644 --- a/moabb/datasets/download.py +++ b/moabb/datasets/download.py @@ -147,8 +147,7 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None): if not destination.is_file() or force_update: if destination.is_file(): destination.unlink() - if not destination.parent.is_dir(): - destination.parent.mkdir(parents=True) + destination.parent.mkdir(parents=True, exist_ok=True) known_hash = None else: known_hash = file_hash(str(destination)) From 37f3618bec31930295d625bb0fa6a16f0ee9afeb Mon Sep 17 00:00:00 2001 From: Sara Sedlar Date: Tue, 11 Jul 2023 17:11:46 +0200 Subject: [PATCH 22/64] P300 annotations (#396) * Change extraction of events from annotations if there is no stim_channel * Update event dictionary and class docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks * Add mapping of old annotation description to 'Target'/'NonTarget' * [pre-commit.ci] auto fixes from pre-commit.com hooks * Formating * [pre-commit.ci] auto fixes from pre-commit.com hooks * Remove dict default class argument as it is a mutable data structure * Updating the whats_new.rst --------- Co-authored-by: Sara Sedlar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru --- docs/source/whats_new.rst | 1 + moabb/datasets/huebner_llp.py | 10 +++++++++- moabb/datasets/sosulski2019.py | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 0cc97c7ed..0bf8d73d4 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -41,6 +41,7 @@ Bugs - Fixing :func:`moabb.dataset.bnci.MNEBNCI.data_path` that returned the data itself instead of paths (:gh:`412` by `Pierre Guetschel`_) - Adding :func:`moabb.datasets.fake` in the init file to use in braindecode object (:gh:`414` by `Bruno Aristimunha`_) - Fixing the parallel download issue when the dataset have the same directory (:gh:`421` by `Sara Sedlar`_) +- Fixing warning with annotation in the p300 datasets (:gh:`421` by `Sara Sedlar`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/huebner_llp.py b/moabb/datasets/huebner_llp.py index 60f5e369c..ad7e36119 100644 --- a/moabb/datasets/huebner_llp.py +++ b/moabb/datasets/huebner_llp.py @@ -18,7 +18,12 @@ class _BaseVisualMatrixSpellerDataset(BaseDataset, ABC): def __init__( - self, src_url, n_subjects, raw_slice_offset, use_blocks_as_sessions=True, **kwargs + self, + src_url, + n_subjects, + raw_slice_offset, + use_blocks_as_sessions=True, + **kwargs, ): self.n_channels = 31 # all channels except 5 times x_* CH and EOGvu if kwargs["interval"] is None: @@ -35,6 +40,7 @@ def __init__( self.raw_slice_offset = 2_000 if raw_slice_offset is None else raw_slice_offset self._src_url = src_url self.use_blocks_as_sessions = use_blocks_as_sessions + self.description_map = {"Stimulus/S 1": "Target", "Stimulus/S 0": "NonTarget"} @staticmethod def _filename_trial_info_extraction(vhdr_file_path): @@ -69,6 +75,8 @@ def _get_single_subject_data(self, subject): verbose=None, ) + raw_bvr_list[0].annotations.rename(self.description_map) + if self.use_blocks_as_sessions: session_name = f"{session_name}_block_{block_idx}" else: diff --git a/moabb/datasets/sosulski2019.py b/moabb/datasets/sosulski2019.py index fdb300958..812f96c83 100644 --- a/moabb/datasets/sosulski2019.py +++ b/moabb/datasets/sosulski2019.py @@ -104,6 +104,7 @@ def __init__( self.stimulus_modality = "tone_oddball" self.n_channels = 31 self.use_soas_as_sessions = use_soas_as_sessions + self.description_map = {"Stimulus/S 21": "Target", "Stimulus/S 1": "NonTarget"} code = "Spot Pilot P300 dataset" interval = [-0.2, 1] if interval is None else interval super().__init__( @@ -141,6 +142,7 @@ def _get_single_run_data(self, file_path): raw.set_montage("standard_1020") if self.reject_non_iid: raw.set_annotations(raw.annotations[7:85]) # non-iid rejection + raw.annotations.rename(self.description_map) return raw def _get_single_subject_data(self, subject): From 84d7a0f6a6b32c80d3648cb894f9517c19fbcc5f Mon Sep 17 00:00:00 2001 From: Bru Date: Tue, 11 Jul 2023 22:57:11 +0200 Subject: [PATCH 23/64] Updating the whats_new.rst and fixing the dataset list (#423) --- docs/source/whats_new.rst | 3 ++- moabb/datasets/utils.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 0bf8d73d4..6c4abbbca 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -41,7 +41,8 @@ Bugs - Fixing :func:`moabb.dataset.bnci.MNEBNCI.data_path` that returned the data itself instead of paths (:gh:`412` by `Pierre Guetschel`_) - Adding :func:`moabb.datasets.fake` in the init file to use in braindecode object (:gh:`414` by `Bruno Aristimunha`_) - Fixing the parallel download issue when the dataset have the same directory (:gh:`421` by `Sara Sedlar`_) -- Fixing warning with annotation in the p300 datasets (:gh:`421` by `Sara Sedlar`_) +- Fixing fixes the problem with the annotation loading for the P300 datasets Sosulski2019, Huebner2017 and Huebner2018 (:gh:`396` by `Sara Sedlar`_) +- Removing the print in the dataset list (:gh:`423` by `Bruno Aristimunha`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/utils.py b/moabb/datasets/utils.py index 8d004642a..7d3e26902 100644 --- a/moabb/datasets/utils.py +++ b/moabb/datasets/utils.py @@ -13,7 +13,6 @@ def _init_dataset_list(): for ds in inspect.getmembers(db, inspect.isclass): - print("ds", ds) if issubclass(ds[1], BaseDataset): dataset_list.append(ds[1]) From 69c357cee4c59487f36cd7ba571856e42caf04ce Mon Sep 17 00:00:00 2001 From: Bru Date: Wed, 12 Jul 2023 11:02:25 +0200 Subject: [PATCH 24/64] speeding the augmentation method (#420) Co-authored-by: Igor Carrara <94047258+carraraig@users.noreply.github.com> --- docs/source/whats_new.rst | 1 + moabb/pipelines/features.py | 33 +++++++++++++++------------------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 6c4abbbca..d28ba7f02 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -28,6 +28,7 @@ Enhancements - Adding saving option for the models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Adding example to load different type of models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Add resting state paradigm with dataset and example (:gh:`400` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) +- Speeding the augmentation method by 400% with NumPy vectorization (:gh:`419` by `Bruno Aristimunha`_) Bugs ~~~~ diff --git a/moabb/pipelines/features.py b/moabb/pipelines/features.py index 329485904..4bfa0a004 100644 --- a/moabb/pipelines/features.py +++ b/moabb/pipelines/features.py @@ -1,6 +1,7 @@ import mne import numpy as np import scipy.signal as signal +from numpy import concatenate, ndarray from sklearn.base import BaseEstimator, TransformerMixin from sklearn.preprocessing import StandardScaler @@ -76,31 +77,27 @@ class AugmentedDataset(BaseEstimator, TransformerMixin): https://doi.org/10.48550/arXiv.2302.04508 """ - def __init__(self, order=1, lag=1): + def __init__(self, order: int = 1, lag: int = 1): self.order = order self.lag = lag - def fit(self, X, y): + def fit(self, X: ndarray, y: ndarray): return self - def transform(self, X): + def transform(self, X: ndarray): if self.order == 1: - X_fin = X + X_fin: ndarray = X else: - X_fin = [] - - for i in np.arange(X.shape[0]): - X_p = X[i][:, : -self.order * self.lag] - X_p = np.concatenate( - [X_p] - + [ - X[i][:, p * self.lag : -(self.order - p) * self.lag] - for p in range(1, self.order) - ], - axis=0, - ) - X_fin.append(X_p) - X_fin = np.array(X_fin) + X_p = X[:, :, : -self.order * self.lag] + X_p = concatenate( + [X_p] + + [ + X[:, :, p * self.lag : -(self.order - p) * self.lag] + for p in range(1, self.order) + ], + axis=1, + ) + X_fin = X_p return X_fin From 65a520129ff778a3c48c921c266de9576ee9fbaa Mon Sep 17 00:00:00 2001 From: Kaos9001 Date: Thu, 20 Jul 2023 17:07:28 -0300 Subject: [PATCH 25/64] Fixed bug in BraindecodeDatasetLoader where incorrect y was used in transform calls (#426) * Fixed bug in BraindecodeDatasetLoader where the transform method would incorrectly always discard its optional y argument in favor of self.y instead of using the cached self.y only when y is None * Updating the whats_new.rst and adding testing --------- Co-authored-by: bruAristimunha --- docs/source/whats_new.rst | 4 ++++ moabb/pipelines/utils_pytorch.py | 4 +++- moabb/tests/util_braindecode.py | 40 +++++++++++++++++++++++++++++--- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index d28ba7f02..2a69bffb8 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -44,6 +44,9 @@ Bugs - Fixing the parallel download issue when the dataset have the same directory (:gh:`421` by `Sara Sedlar`_) - Fixing fixes the problem with the annotation loading for the P300 datasets Sosulski2019, Huebner2017 and Huebner2018 (:gh:`396` by `Sara Sedlar`_) - Removing the print in the dataset list (:gh:`423` by `Bruno Aristimunha`_) +- Fixing bug in :func:`moabb.pipeline.utils_pytorch.BraindecodeDatasetLoader` where incorrect y was used in transform calls (:gh:`426` by `Gabriel Schwartz`_) +- Fixing one test in :func:`moabb.pipeline.utils_pytorch.BraindecodeDatasetLoader` (:gh:`426` by `Bruno Aristimunha`_) + API changes ~~~~~~~~~~~ @@ -331,6 +334,7 @@ Bugs API changes ~~~~~~~~~~~ - None +.. _Gabriel Schwartz: https://github.com/Kaos9001 .. _Sara Sedlar: https://github.com/Sara04 .. _Emmanuel Kalunga: https://github.com/emmanuelkalunga .. _Gregoire Cattan: https://github.com/gcattan diff --git a/moabb/pipelines/utils_pytorch.py b/moabb/pipelines/utils_pytorch.py index b91a71b30..382f1b283 100644 --- a/moabb/pipelines/utils_pytorch.py +++ b/moabb/pipelines/utils_pytorch.py @@ -48,9 +48,11 @@ def fit(self, X, y=None): def transform(self, X, y=None): _check_data_format(X) + if y is None: + y = self.y dataset = create_from_X_y( X=X.get_data(), - y=self.y, + y=y, window_size_samples=X.get_data().shape[2], window_stride_samples=X.get_data().shape[2], drop_last_window=self.drop_last_window, diff --git a/moabb/tests/util_braindecode.py b/moabb/tests/util_braindecode.py index 679b79f0f..bc1c036fa 100644 --- a/moabb/tests/util_braindecode.py +++ b/moabb/tests/util_braindecode.py @@ -106,9 +106,7 @@ def test_type_create_from_X_y_vs_transfomer(self, data): sfreq=X_train.info["sfreq"], ) transformer = BraindecodeDatasetLoader() - dataset_trans = transformer.fit(X=X_train.get_data(), y=y_train).transform( - X_train - ) + dataset_trans = transformer.fit(X=X_train, y=y_train).transform(X_train) assert isinstance(dataset_trans, BaseConcatDataset) assert type(dataset_trans) == type(dataset) @@ -118,6 +116,42 @@ def test_wrong_input(self): with pytest.raises(ValueError): transformer.fit_transform(np.random.normal(size=(2, 1, 10)), y=np.array([0])) + def test_transformer_transform_with_custom_y(self, data): + """Test whether the provided y is used during transform""" + X_train, y_train, _, _ = data + transformer = BraindecodeDatasetLoader() + + # Create test data with different y values + X_test = X_train.copy() + y_test = y_train + 1 + + # Fit the transformer with training data and custom y + transformer.fit(X_train, y_train) + + # Transform the test data with custom y + dataset_test = transformer.transform(X_test, y=y_test) + + # Verify that the transformed dataset contains the test data's x values and the custom y values + assert len(dataset_test) == len(X_test) + assert np.array_equal(dataset_test[0][1], y_test[0]) + assert np.array_equal(dataset_test[1][1], y_test[1]) + + def test_transformer_transform_with_default_y(self, data): + """Test whether self.y is used when y is not provided during transform""" + X_train, y_train, _, _ = data + transformer = BraindecodeDatasetLoader() + + # Fit the transformer with training data and default y + transformer.fit(X_train, y_train) + + # Transform the test data without providing y + dataset_test = transformer.transform(X_train) + + # Verify that the transformed dataset contains the training data's x values and the default y values + assert len(dataset_test) == len(X_train) + assert np.array_equal(dataset_test[0][1], y_train[0]) + assert np.array_equal(dataset_test[1][1], y_train[1]) + if __name__ == "__main__": unittest.main() From d18b9e1e4600d42b7130aca438797b6cb521f59b Mon Sep 17 00:00:00 2001 From: Bru Date: Mon, 31 Jul 2023 19:21:22 +0200 Subject: [PATCH 26/64] Improve pre-commit (#435) * Updating the whats_new.rst and fixing the dataset list * Improving the pre-commit * Improving the pre-commit * changing permision file * changing other files permission * changing other files permission * Fixing the yaml files with yamllint * Removing the double quoted string fixer * Ignoring the E501 * Removing autopep8 * Fixing typos * install pip --- .pre-commit-config.yaml | 68 ++++++++++++++++--- CODE_OF_CONDUCT.md | 0 CONTRIBUTING.md | 0 ROADMAP.md | 0 docs/source/CONTRIBUTING.md | 0 docs/source/install/install_pip.rst | 2 +- docs/source/install/install_source.rst | 4 +- docs/source/whats_new.rst | 5 +- .../plot_filterbank_csp_vs_csp.py | 3 +- .../plot_mne_and_scikit_estimators.py | 1 + examples/changing_download_directory.py | 5 +- examples/example_codecarbon.py | 26 ++++--- .../plot_learning_curve_p300_external.py | 10 ++- .../plot_learning_curve_motor_imagery.py | 4 +- .../plot_learning_curve_p300.py | 2 - examples/load_model.py | 2 - examples/plot_benchmark.py | 2 +- examples/plot_benchmark_grid_search.py | 1 - examples/plot_braindecode.py | 3 - examples/plot_phmd_ml_spectrum.py | 1 - .../plot_vr_pc_p300_different_epoch_size.py | 2 +- moabb/analysis/meta_analysis.py | 4 +- moabb/analysis/results.py | 4 +- moabb/datasets/Lee2019.py | 2 +- moabb/datasets/base.py | 8 +-- moabb/datasets/bbci_eeg_fnirs.py | 4 +- moabb/datasets/bnci.py | 12 ++-- moabb/datasets/braininvaders.py | 2 +- moabb/datasets/download.py | 2 +- moabb/datasets/fake.py | 2 +- moabb/datasets/neiry.py | 6 +- moabb/datasets/physionet_mi.py | 2 +- moabb/datasets/schirrmeister2017.py | 2 +- moabb/datasets/ssvep_mamem.py | 4 +- moabb/evaluations/base.py | 4 +- moabb/pipelines/classification.py | 12 ++-- moabb/pipelines/utils.py | 2 +- moabb/pipelines/utils_pytorch.py | 2 +- moabb/tests/paradigms.py | 8 +-- moabb/tests/test_pipelines/SSVEP_CCA.yml | 2 +- moabb/tests/util_braindecode.py | 2 +- pipelines/CCA-SSVEP.yml | 2 +- pipelines/MsetCCA-SSVEP.yml | 2 +- pipelines/TRCA-SSVEP.yml | 2 +- ...orial_3_benchmarking_multiple_pipelines.py | 2 +- tutorials/tutorial_4_adding_a_dataset.py | 2 +- .../tutorial_5_build_a_custom_dataset.py | 2 +- 47 files changed, 141 insertions(+), 98 deletions(-) mode change 100755 => 100644 CODE_OF_CONDUCT.md mode change 100755 => 100644 CONTRIBUTING.md mode change 100755 => 100644 ROADMAP.md mode change 100755 => 100644 docs/source/CONTRIBUTING.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 32c4bcf8c..97f655314 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,30 +17,80 @@ repos: hooks: - id: check-yaml - id: check-json + - id: check-executables-have-shebangs + - id: check-toml + - id: check-docstring-first - id: check-added-large-files - id: end-of-file-fixer - id: trailing-whitespace - id: check-case-conflict - id: mixed-line-ending + - id: end-of-file-fixer + - id: check-case-conflict + - id: forbid-new-submodules + - id: pretty-format-json + args: ["--autofix", "--no-sort-keys", "--indent=4"] + - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.7.0 hooks: - id: black + language_version: python3.8 + args: [--line-length=90, --target-version=py38] + + + - repo: https://github.com/asottile/blacken-docs + rev: 1.15.0 + hooks: + - id: blacken-docs + additional_dependencies: [black==23.3.0] + exclude: ^.github/ - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: - id: isort - - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + - repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + additional_dependencies: [ + 'flake8-blind-except', + 'flake8-docstrings', + 'flake8-bugbear', + 'flake8-comprehensions', + 'flake8-docstrings', + 'flake8-implicit-str-concat', + 'pydocstyle>=5.0.0', + ] + exclude: ^docs/ | ^setup\.py$ | + + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.280 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix, --ignore, E501] + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.5 hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear] + - id: codespell + args: + - --ignore-words-list=additionals,alle,alot,bund,currenty,datas,farenheit,falsy,fo,haa,hass,iif,incomfort,ines,ist,nam,nd,pres,pullrequests,resset,rime,ser,serie,te,technik,ue,unsecure,withing,zar + - --skip="./.*,*.csv,*.json,*.ambr" + - --quiet-level=2 + exclude_types: [csv, json] + exclude: ^tests/|generated/ - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.9-for-vscode + - repo: https://github.com/adrienverge/yamllint.git + rev: v1.32.0 hooks: - - id: prettier - args: [--print-width=90, --prose-wrap=always] + - id: yamllint + exclude: > + (?x)^( + .pre-commit-config.yaml*| + .github/workflows/.*| + + )$ diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md old mode 100755 new mode 100644 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md old mode 100755 new mode 100644 diff --git a/ROADMAP.md b/ROADMAP.md old mode 100755 new mode 100644 diff --git a/docs/source/CONTRIBUTING.md b/docs/source/CONTRIBUTING.md old mode 100755 new mode 100644 diff --git a/docs/source/install/install_pip.rst b/docs/source/install/install_pip.rst index 446a856d2..2029738ed 100644 --- a/docs/source/install/install_pip.rst +++ b/docs/source/install/install_pip.rst @@ -8,7 +8,7 @@ MOABB can be installed via pip from `PyPI `__. .. note:: We recommend the most updated version of pip to install from PyPI. -Bellow are the installation commands for the most common use cases. +Below are the installation commands for the most common use cases. .. code-block:: console diff --git a/docs/source/install/install_source.rst b/docs/source/install/install_source.rst index 400631fc4..29f70ff89 100644 --- a/docs/source/install/install_source.rst +++ b/docs/source/install/install_source.rst @@ -7,7 +7,7 @@ If you want to test features under development or contribute to the library, or .. note:: - If you are only trying to install MOABB, we recommend using the pip installation `Installation `__ for details on that. + If you are only trying to install MOABB, we recommend using the pip installation `Installation `__ for details on that. .. _system-level: @@ -56,7 +56,7 @@ If you want to build from source to work on MOABB itself, then follow these step You could also check checkout `poetry installation instruction `__ or use `conda forge version `__ -We need the most updated version of the poetry to ensure the compatiblity with optional dependency. +We need the most updated version of the poetry to ensure the compatibility with optional dependency. .. note:: If you have any group-related errors at the end of this section, you may not run the proper version of poetry. diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 2a69bffb8..47d16f19d 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -29,6 +29,7 @@ Enhancements - Adding example to load different type of models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Add resting state paradigm with dataset and example (:gh:`400` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) - Speeding the augmentation method by 400% with NumPy vectorization (:gh:`419` by `Bruno Aristimunha`_) +- Improving the review processing with more pre-commit bots (:gh:`434` by `Bruno Aristimunha`_) Bugs ~~~~ @@ -70,7 +71,7 @@ Enhancements - Add a augmentation model to the pipeline (:gh:`326` by `Igor Carrara`_) - Add BrainDecode example (:gh:`340` by `Igor Carrara`_ and `Bruno Aristimunha`_) - Add Google Analytics to the documentation (:gh:`335` by `Bruno Aristimunha`_) -- Add suport to Braindecode classifier (:gh:`328` by `Bruno Aristimunha`_) +- Add support to Braindecode classifier (:gh:`328` by `Bruno Aristimunha`_) - Add CodeCarbon to track emission CO₂ (:gh:`350` by `Igor Carrara`_, `Bruno Aristimunha`_ and `Sylvain Chevallier`_) - Add CodeCarbon example (:gh:`356` by `Igor Carrara`_ and `Bruno Aristimunha`_) - Add MsetCCA method for SSVEP classification, parametrise CCA `n_components` in CCA based methods (:gh:`359` by `Emmanuel Kalunga`_ and `Sylvain Chevallier`_) @@ -137,7 +138,7 @@ Bugs - Removing dependency on mne method for PhysionetMI data downloading, renaming runs (:gh:`257` by `Divyesh Narayanan`_) - Correcting events management in Schirrmeister2017, renaming session and run (:gh:`255` by `Pierre Guetschel`_ and `Sylvain Chevallier`_) - Switch session and runs in MAMEM1, 2 and 3 to avoid error in WithinSessionEvaluation (:gh:`256` by `Sylvain Chevallier`_) -- Correct doctstrings for the documentation, incuding Lee2017 (:gh:`256` by `Sylvain Chevallier`_) +- Correct doctstrings for the documentation, including Lee2017 (:gh:`256` by `Sylvain Chevallier`_) Version - 0.4.4 diff --git a/examples/advanced_examples/plot_filterbank_csp_vs_csp.py b/examples/advanced_examples/plot_filterbank_csp_vs_csp.py index 149b6bcc5..0e9a2e5f5 100644 --- a/examples/advanced_examples/plot_filterbank_csp_vs_csp.py +++ b/examples/advanced_examples/plot_filterbank_csp_vs_csp.py @@ -31,7 +31,7 @@ # ---------------- # # The CSP implementation from MNE is used. We selected 8 CSP components, as -# usually done in the litterature. +# usually done in the literature. # # The second pipeline is the filter bank CSP. We use the FilterBank object # with a CSP estimator. We set up the CSP to 4 components, to compensate for @@ -88,7 +88,6 @@ results = pd.concat([results, results_fb]) - ############################################################################## # Plot Results # ---------------- diff --git a/examples/advanced_examples/plot_mne_and_scikit_estimators.py b/examples/advanced_examples/plot_mne_and_scikit_estimators.py index 5454f83a3..8149eb99a 100644 --- a/examples/advanced_examples/plot_mne_and_scikit_estimators.py +++ b/examples/advanced_examples/plot_mne_and_scikit_estimators.py @@ -76,6 +76,7 @@ dataset=dataset, subjects=subject_list, return_epochs=True ) + ############################################################################## # A Simple MNE Pipeline # --------------------- diff --git a/examples/changing_download_directory.py b/examples/changing_download_directory.py index 224c910c9..095ac2c3e 100644 --- a/examples/changing_download_directory.py +++ b/examples/changing_download_directory.py @@ -3,9 +3,8 @@ Change Download Directory =========================== -This is a minimal example to demonstrate how to change the default data download directory to a custom -path/location. -""" +This is a minimal example to demonstrate how to change the default data +download directory to a custom path/location. """ # Authors: Divyesh Narayanan # # License: BSD (3-clause) diff --git a/examples/example_codecarbon.py b/examples/example_codecarbon.py index 6adfddb13..68286d71b 100644 --- a/examples/example_codecarbon.py +++ b/examples/example_codecarbon.py @@ -40,14 +40,16 @@ # If you want to limit your benchmark on a subset of datasets, you can use the # ``include_datasets`` and ``exclude_datasets`` arguments. You will need either # to provide the dataset's object, or a dataset's code. To get the list of -# available dataset's code for a given paradigm, you can use the following command: +# available dataset's code for a given paradigm, you can use the following +# command: paradigm = LeftRightImagery() for d in paradigm.datasets: print(d.code) ############################################################################### -# In this example, we will use only the last dataset, 'Zhou 2016', considering only the first subject. +# In this example, we will use only the last dataset, 'Zhou 2016', considering +# only the first subject. # # Running the benchmark # --------------------- @@ -55,15 +57,16 @@ # The benchmark is run using the ``benchmark`` function. You need to specify the # folder containing the pipelines to use, the kind of evaluation and the paradigm # to use. By default, the benchmark will use all available datasets for all -# paradigms listed in the pipelines. You could restrict to specific evaluation and -# paradigm using the ``evaluations`` and ``paradigms`` arguments. +# paradigms listed in the pipelines. You could restrict to specific evaluation +# and paradigm using the ``evaluations`` and ``paradigms`` arguments. # # To save computation time, the results are cached. If you want to re-run the # benchmark, you can set the ``overwrite`` argument to ``True``. # -# It is possible to indicate the folder to cache the results and the one to save -# the analysis & figures. By default, the results are saved in the ``results`` -# folder, and the analysis & figures are saved in the ``benchmark`` folder. +# It is possible to indicate the folder to cache the results and the one to +# save the analysis & figures. By default, the results are saved in the +# ``results`` folder, and the analysis & figures are saved in the ``benchmark`` +# folder. dataset = Zhou2016() dataset2 = BNCI2014001() @@ -99,16 +102,17 @@ ############################################################################### # Plotting the results # -------------------- -# We can plot the results using the ``codecarbon_plot`` function, generated bellow. This function takes the -# dataframe returned by the ``benchmark`` function as input, and returns a pyplot figure. +# We can plot the results using the ``codecarbon_plot`` function, generated +# below. This function takes the dataframe returned by the ``benchmark`` +# function as input, and returns a pyplot figure. # The ``order_list`` argument is used to specify the order of the pipelines in # the plot. codecarbon_plot(results, order_list, country="(France)") ############################################################################### -# The result expected will be the following image, but varying depending on the machine -# and the country used to run the example. +# The result expected will be the following image, but varying depending on the +# machine and the country used to run the example. # # .. image:: ../images/example_codecarbon.png # :align: center diff --git a/examples/external/plot_learning_curve_p300_external.py b/examples/external/plot_learning_curve_p300_external.py index 22648e294..574b5df24 100644 --- a/examples/external/plot_learning_curve_p300_external.py +++ b/examples/external/plot_learning_curve_p300_external.py @@ -5,8 +5,8 @@ This example shows how to perform a within session analysis while also creating learning curves for a P300 dataset. -Additionally, we will evaluate external code. Make sure to have tdlda installed, which -can be found in requirements_external.txt +Additionally, we will evaluate external code. Make sure to have tdlda installed +, which can be found in requirements_external.txt We will compare three pipelines : @@ -42,7 +42,6 @@ warnings.simplefilter(action="ignore", category=FutureWarning) warnings.simplefilter(action="ignore", category=RuntimeWarning) - moabb.set_log_level("info") ############################################################################## @@ -91,7 +90,6 @@ c.preproc = jmv pipelines["JM+TD-LDA"] = make_pipeline(jmv, c) - ############################################################################## # Evaluation # ---------- @@ -107,7 +105,8 @@ datasets = [dataset] overwrite = True # set to True if we want to overwrite cached results data_size = dict(policy="ratio", value=np.geomspace(0.02, 1, 6)) -# When the training data is sparse, peform more permutations than when we have a lot of data +# When the training data is sparse, perform more permutations than when we have +# a lot of data n_perms = np.floor(np.geomspace(20, 2, len(data_size["value"]))).astype(int) print(n_perms) # Guarantee reproducibility @@ -121,7 +120,6 @@ overwrite=overwrite, ) - results = evaluation.process(pipelines) ############################################################################## diff --git a/examples/learning_curve/plot_learning_curve_motor_imagery.py b/examples/learning_curve/plot_learning_curve_motor_imagery.py index df17ce86b..99db91a6a 100644 --- a/examples/learning_curve/plot_learning_curve_motor_imagery.py +++ b/examples/learning_curve/plot_learning_curve_motor_imagery.py @@ -44,7 +44,7 @@ # Pipelines must be a dict of sklearn pipeline transformer. # # The CSP implementation from MNE is used. We selected 8 CSP components, as -# usually done in the litterature. +# usually done in the literature. # # The Riemannian geometry pipeline consists in covariance estimation, tangent # space mapping and finally a logistic regression for the classification. @@ -78,7 +78,7 @@ overwrite = True # set to True if we want to overwrite cached results # Evaluate for a specific number of training samples per class data_size = dict(policy="per_class", value=np.array([5, 10, 30, 50])) -# When the training data is sparse, peform more permutations than when we have a lot of data +# When the training data is sparse, perform more permutations than when we have a lot of data n_perms = np.floor(np.geomspace(20, 2, len(data_size["value"]))).astype(int) evaluation = WithinSessionEvaluation( paradigm=paradigm, diff --git a/examples/learning_curve/plot_learning_curve_p300.py b/examples/learning_curve/plot_learning_curve_p300.py index 7055b779b..f17aa5ed7 100644 --- a/examples/learning_curve/plot_learning_curve_p300.py +++ b/examples/learning_curve/plot_learning_curve_p300.py @@ -41,7 +41,6 @@ warnings.simplefilter(action="ignore", category=FutureWarning) warnings.simplefilter(action="ignore", category=RuntimeWarning) - moabb.set_log_level("info") ############################################################################## @@ -97,7 +96,6 @@ overwrite=overwrite, ) - results = evaluation.process(pipelines) ############################################################################## diff --git a/examples/load_model.py b/examples/load_model.py index 1a7b1c32c..836f54483 100644 --- a/examples/load_model.py +++ b/examples/load_model.py @@ -67,7 +67,6 @@ ] ) - ############################################################################### # Loading the PyTorch model @@ -115,7 +114,6 @@ clf.load_params(f_params=f_params, f_optimizer=f_optimizer, f_history=f_history) - # Create the dataset create_dataset = BraindecodeDatasetLoader(drop_last_window=False) diff --git a/examples/plot_benchmark.py b/examples/plot_benchmark.py index dc0095635..227a890e6 100644 --- a/examples/plot_benchmark.py +++ b/examples/plot_benchmark.py @@ -29,7 +29,7 @@ # simple format. It simplifies sharing and reusing pipelines across benchmarks, # reproducing state-of-the-art results. # -# MOABB comes with complete list of pipelines that cover most of the sucessful +# MOABB comes with complete list of pipelines that cover most of the successful # approaches in the literature. You can find them in the # `pipelines folder `_. # For this example, we will use a folder with only 2 pipelines, to keep the diff --git a/examples/plot_benchmark_grid_search.py b/examples/plot_benchmark_grid_search.py index 0f1f2500a..6d13537dd 100644 --- a/examples/plot_benchmark_grid_search.py +++ b/examples/plot_benchmark_grid_search.py @@ -21,7 +21,6 @@ set_log_level("info") - ############################################################################### # In this example, we will use only the dataset 'Zhou 2016'. # diff --git a/examples/plot_braindecode.py b/examples/plot_braindecode.py index f52089be8..4f3b0b75a 100644 --- a/examples/plot_braindecode.py +++ b/examples/plot_braindecode.py @@ -37,7 +37,6 @@ device = "cuda" if cuda else "cpu" print("GPU is", "AVAILABLE" if cuda else "NOT AVAILABLE") - ############################################################################### # In this example, we will use only the dataset ``BNCI2014001``. # @@ -63,7 +62,6 @@ torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False - # Hyperparameter LEARNING_RATE = 0.0625 * 0.01 # parameter taken from Braindecode WEIGHT_DECAY = 0 # parameter taken from Braindecode @@ -131,7 +129,6 @@ pipes = {} pipes["EEGNetV4"] = Pipeline([("Braindecode_dataset", create_dataset), ("Net", clf)]) - ############################################################################## # Evaluation # ---------- diff --git a/examples/plot_phmd_ml_spectrum.py b/examples/plot_phmd_ml_spectrum.py index aee76c249..4c42a9df4 100644 --- a/examples/plot_phmd_ml_spectrum.py +++ b/examples/plot_phmd_ml_spectrum.py @@ -42,7 +42,6 @@ events = ["on", "off"] paradigm = RestingStateToP300Adapter(events=events, channels=[channel]) - ############################################################################### # Estimate Power Spectral Density # --------------- diff --git a/examples/plot_vr_pc_p300_different_epoch_size.py b/examples/plot_vr_pc_p300_different_epoch_size.py index 236f2bd84..5b2822e66 100644 --- a/examples/plot_vr_pc_p300_different_epoch_size.py +++ b/examples/plot_vr_pc_p300_different_epoch_size.py @@ -112,7 +112,7 @@ paradigm, [subject], blocks[test_idx], repetitions ) - # We use riemannian geometry processing technics with MDM algorithm. + # We use riemannian geometry processing techniques with MDM algorithm. pipe = make_pipeline(ERPCovariances(estimator="lwf"), MDM()) pipe.fit(X_train, y_train) y_pred = pipe.predict(X_test) diff --git a/moabb/analysis/meta_analysis.py b/moabb/analysis/meta_analysis.py index 531f761d6..574d58061 100644 --- a/moabb/analysis/meta_analysis.py +++ b/moabb/analysis/meta_analysis.py @@ -31,7 +31,7 @@ def collapse_session_scores(df): def compute_pvals_wilcoxon(df, order=None): - """Compute Wilcoxon rank-sum test on agregated results + """Compute Wilcoxon rank-sum test on aggregated results Returns kxk matrix of p-values computed via the Wilcoxon rank-sum test, order defines the order of rows and columns @@ -133,7 +133,7 @@ def _pairedttest_random(data, nperms): def compute_pvals_perm(df, order=None): - """Compute permutation test on agregated results + """Compute permutation test on aggregated results Returns kxk matrix of p-values computed via permutation test, order defines the order of rows and columns diff --git a/moabb/analysis/results.py b/moabb/analysis/results.py index a68e42625..0360dc5a1 100644 --- a/moabb/analysis/results.py +++ b/moabb/analysis/results.py @@ -136,7 +136,7 @@ def to_list(res): for name, data_dict in results.items(): digest = get_digest(pipelines[name]) if digest not in f.keys(): - # create pipeline main group if nonexistant + # create pipeline main group if nonexistent f.create_group(digest) ppline_grp = f[digest] @@ -148,7 +148,7 @@ def to_list(res): dname = d1["dataset"].code n_add_cols = len(self.additional_columns) if dname not in ppline_grp.keys(): - # create dataset subgroup if nonexistant + # create dataset subgroup if nonexistent dset = ppline_grp.create_group(dname) dset.attrs["n_subj"] = len(d1["dataset"].subject_list) dset.attrs["n_sessions"] = d1["dataset"].n_sessions diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py index d4e35fbd2..25c3a469c 100644 --- a/moabb/datasets/Lee2019.py +++ b/moabb/datasets/Lee2019.py @@ -59,7 +59,7 @@ def __init__( raise ValueError('unknown paradigm "{}"'.format(paradigm)) for s in sessions: if s not in [1, 2]: - raise ValueError("inexistant session {}".format(s)) + raise ValueError("inexistent session {}".format(s)) self.sessions = sessions super().__init__( diff --git a/moabb/datasets/base.py b/moabb/datasets/base.py index 1b048a8d6..58da45d81 100644 --- a/moabb/datasets/base.py +++ b/moabb/datasets/base.py @@ -77,7 +77,7 @@ def __init__( def get_data(self, subjects=None): """Return the data correspoonding to a list of subjects. - The returned data is a dictionary with the folowing structure:: + The returned data is a dictionary with the following structure:: data = {'subject_id' : {'session_id': @@ -88,7 +88,7 @@ def get_data(self, subjects=None): subjects are on top, then we have sessions, then runs. A sessions is a recording done in a single day, without removing the EEG cap. A session is constitued of at least one run. A run is a single - contigous recording. Some dataset break session in multiple runs. + contiguous recording. Some dataset break session in multiple runs. Parameters ---------- @@ -125,7 +125,7 @@ def download( ): """Download all data from the dataset. - This function is only usefull to download all the dataset at once. + This function is only useful to download all the dataset at once. Parameters @@ -178,7 +178,7 @@ def download( def _get_single_subject_data(self, subject): """Return the data of a single subject. - The returned data is a dictionary with the folowing structure + The returned data is a dictionary with the following structure data = {'session_id': {'run_id': raw} diff --git a/moabb/datasets/bbci_eeg_fnirs.py b/moabb/datasets/bbci_eeg_fnirs.py index 56e771d63..f4e780bf2 100644 --- a/moabb/datasets/bbci_eeg_fnirs.py +++ b/moabb/datasets/bbci_eeg_fnirs.py @@ -34,7 +34,7 @@ def eeg_data_path(base_path, subject, accept): if not accept: raise AttributeError( "You must accept licence term to download this dataset," - "set accept=True when instanciating the dataset." + "set accept=True when instantiating the dataset." ) retrieve( "{}/EEG/EEG_{:02d}-{:02d}.zip".format(SHIN_URL, low, high), @@ -59,7 +59,7 @@ def fnirs_data_path(path, subject, accept): if not accept: raise AttributeError( "You must accept licence term to download this dataset," - "set accept=True when instanciating the dataset." + "set accept=True when instantiating the dataset." ) retrieve( "http://doc.ml.tu-berlin.de/hBCI/NIRS/NIRS_01-29.zip", diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py index c8b80a402..bdb26587c 100644 --- a/moabb/datasets/bnci.py +++ b/moabb/datasets/bnci.py @@ -68,7 +68,7 @@ def load_data( List of raw instances for each non consecutive recording. Depending on the dataset it could be a BCI run or a different recording session. event_id: dict - dictonary containing events and their code. + dictionary containing events and their code. """ dataset_list = { "001-2014": _load_data_001_2014, @@ -365,7 +365,7 @@ def _load_data_003_2015( # flash events on the channel 9 flashs = run[9:10] ix_flash = flashs[0] > 0 - flashs[0, ix_flash] += 2 # add 2 to avoid overlapp on event id + flashs[0, ix_flash] += 2 # add 2 to avoid overlap on event id flash_code = np.unique(flashs[0, ix_flash]) if len(flash_code) == 36: @@ -849,7 +849,7 @@ class for validation. Participants had the task of performing sustained (5 .. [1] Steyrl, D., Scherer, R., Faller, J. and Müller-Putz, G.R., 2016. Random forests in non-invasive sensorimotor rhythm brain-computer interfaces: a practical and convenient non-linear classifier. - Biomedical Engineering/Biomedizinische Technik, 61(1), pp.77-86. + Biomedical Engineering/Biomedizinische Technique, 61(1), pp.77-86. """ @@ -966,7 +966,7 @@ class BNCI2014008(MNEBNCI): This dataset represents a complete record of P300 evoked potentials using a paradigm originally described by Farwell and Donchin [2]_. - In these sessions, 8 users with amyotrophic lateral sclerosis (ALS) + In these sessions, 8 users with amyotrophic lateral sclerosis (ALSO) focused on one out of 36 different characters. The objective in this contest is to predict the correct character in each of the provided character selection epochs. @@ -1037,7 +1037,7 @@ class BNCI2014009(MNEBNCI): This dataset presents a complete record of P300 evoked potentials using two different paradigms: a paradigm based on the P300 Speller in - overt attention condition and a paradigm based used in covert attention + overt attention condition and a paradigm based used in convert attention condition. In these sessions, 10 healthy subjects focused on one out of 36 different characters. The objective was to predict the correct character in each of the provided character selection epochs. @@ -1110,7 +1110,7 @@ class BNCI2015001(MNEBNCI): The task for the user was to perform sustained right hand versus both feet movement imagery starting from the cue (second 3) to the end of the cross - period (sec- ond 8). A trial started with 3 s of reference period, + period (sec- and 8). A trial started with 3 s of reference period, followed by a brisk audible cue and a visual cue (arrow right for right hand, arrow down for both feet) from second 3 to 4.25. The activity period, where the users received feedback, lasted from diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index d0cbd0189..5fca06a80 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -889,7 +889,7 @@ def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): """Select data for all provided subjects, blocks and repetitions. Each subject has 12 blocks of 5 repetitions. - The returned data is a dictionary with the folowing structure:: + The returned data is a dictionary with the following structure:: data = {'subject_id' : {'session_id': diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py index ceb182c56..eb59a4b5a 100644 --- a/moabb/datasets/download.py +++ b/moabb/datasets/download.py @@ -258,7 +258,7 @@ def fs_get_file_id(filelist): Returns ------- response : dict - keys are filname and values are file_id + keys are filename and values are file_id """ return {f["name"]: str(f["id"]) for f in filelist} diff --git a/moabb/datasets/fake.py b/moabb/datasets/fake.py index 93a4243dd..12f220bcb 100644 --- a/moabb/datasets/fake.py +++ b/moabb/datasets/fake.py @@ -121,7 +121,7 @@ def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): """Select data for all provided subjects, blocks and repetitions. Each subject has 5 blocks of 12 repetitions. - The returned data is a dictionary with the folowing structure:: + The returned data is a dictionary with the following structure:: data = {'subject_id' : {'session_id': diff --git a/moabb/datasets/neiry.py b/moabb/datasets/neiry.py index 2319a710b..ed17eacf2 100644 --- a/moabb/datasets/neiry.py +++ b/moabb/datasets/neiry.py @@ -54,8 +54,8 @@ class DemonsP300(BaseDataset): The player was supposed to feed animals and protect them from demons. Game mechanics consisted in demons jumping (visually activating), so player have to concentrate on one demon (chosen freely). That produced - P300 response in time of the deamon jump. That was the way to trigger fireball - torwards a deamon predicted by classifier from EEG data. + P300 response in time of the daemon jump. That was the way to trigger fireball + towards a daemon predicted by classifier from EEG data. More info can be found in [1]_ [2]_ [3]_. @@ -66,7 +66,7 @@ class DemonsP300(BaseDataset): Raccoons vs Demons: multiclass labeled P300 dataset, https://arxiv.org/abs/2005.02251 .. [2] Goncharenko V., Grigoryan R., and Samokhina A., - Approaches to multiclass classifcation of P300 potential datasets, + Approaches to multiclass classification of P300 potential datasets, Intelligent Data Processing: Theory and Applications:Book of abstract of the 13th International Conference, Moscow, 2020. — Moscow: Russian Academy of Sciences, 2020. — 472 p.ISBN 978-5-907366-16-9 diff --git a/moabb/datasets/physionet_mi.py b/moabb/datasets/physionet_mi.py index 6ae441287..8afe12c9f 100644 --- a/moabb/datasets/physionet_mi.py +++ b/moabb/datasets/physionet_mi.py @@ -140,7 +140,7 @@ def _get_single_subject_data(self, subject): # feet runs for run in self.feet_runs: raw = self._load_one_run(subject, run) - # modify stim channels to match new event ids. for feets runs, + # modify stim channels to match new event ids. for feet runs, # hand = 2 modified to 4, and feet = 3, modified to 5 stim = raw.annotations.description.astype(np.dtype(" 1: S = S + cov[:n_channels, n_channels:] + cov[n_channels:, :n_channels] @@ -453,7 +453,7 @@ def predict(self, X): return y_pred def predict_proba(self, X): - """Make predictions on unseen data with the asociated probabilities. + """Make predictions on unseen data with the associated probabilities. The new data observation X will be filtered with weights previously extracted and compared to the templates to assess @@ -653,7 +653,7 @@ def predict(self, X): return y def predict_proba(self, X): - """Probabilty could be computed from the correlation coefficient""" + """Probability could be computed from the correlation coefficient""" # Check is fit had been called check_is_fitted(self) diff --git a/moabb/pipelines/utils.py b/moabb/pipelines/utils.py index 8eb90307a..4b15e782a 100644 --- a/moabb/pipelines/utils.py +++ b/moabb/pipelines/utils.py @@ -209,7 +209,7 @@ def generate_param_grid(pipeline_configs, context=None, logger=log): class FilterBank(BaseEstimator, TransformerMixin): - """Apply a given indentical pipeline over a bank of filter. + """Apply a given identical pipeline over a bank of filter. The pipeline provided with the constrictor will be appield on the 4th axis of the input data. This pipeline should be used with a FilterBank diff --git a/moabb/pipelines/utils_pytorch.py b/moabb/pipelines/utils_pytorch.py index 382f1b283..2ab3dfed9 100644 --- a/moabb/pipelines/utils_pytorch.py +++ b/moabb/pipelines/utils_pytorch.py @@ -63,7 +63,7 @@ def transform(self, X, y=None): return dataset def __sklearn_is_fitted__(self): - """Return True since Transfomer is stateless.""" + """Return True since Transformer is stateless.""" return True diff --git a/moabb/tests/paradigms.py b/moabb/tests/paradigms.py index f69f4072f..1c128e231 100644 --- a/moabb/tests/paradigms.py +++ b/moabb/tests/paradigms.py @@ -94,7 +94,7 @@ def test_BaseImagery_filters(self): def test_baseImagery_wrongevent(self): # test process_raw return empty list if raw does not contain any - # selected event. cetain runs in dataset are event specific. + # selected event. certain runs in dataset are event specific. paradigm = SimpleMotorImagery(filters=[[7, 12], [12, 24]]) dataset = FakeDataset(paradigm="imagery") raw = dataset.get_data([1])[1]["session_0"]["run_0"] @@ -151,7 +151,7 @@ def test_LeftRightImagery_paradigm(self): self.assertIsInstance(epochs, BaseEpochs) def test_LeftRightImagery_noevent(self): - # we cant pass event to this class + # we can't pass event to this class self.assertRaises(ValueError, LeftRightImagery, events=["a"]) def test_LeftRightImagery_badevents(self): @@ -270,7 +270,7 @@ def test_BaseP300_filters(self): def test_BaseP300_wrongevent(self): # test process_raw return empty list if raw does not contain any - # selected event. cetain runs in dataset are event specific. + # selected event. certain runs in dataset are event specific. paradigm = SimpleP300(filters=[[1, 12], [12, 24]]) dataset = FakeDataset(paradigm="p300", event_list=["Target", "NonTarget"]) raw = dataset.get_data([1])[1]["session_0"]["run_0"] @@ -309,7 +309,7 @@ def test_BaseP300_epochsmetadata(self): self.assertTrue(metadata.equals(epochs.metadata)) def test_P300_specifyevent(self): - # we cant pass event to this class + # we can't pass event to this class self.assertRaises(ValueError, P300, events=["a"]) def test_P300_wrongevent(self): diff --git a/moabb/tests/test_pipelines/SSVEP_CCA.yml b/moabb/tests/test_pipelines/SSVEP_CCA.yml index 5bb5b4ea2..cdf712018 100644 --- a/moabb/tests/test_pipelines/SSVEP_CCA.yml +++ b/moabb/tests/test_pipelines/SSVEP_CCA.yml @@ -9,4 +9,4 @@ pipeline: parameters: n_harmonics: 3 interval: [1, 3] - freqs: { "13": 0, "17": 1 } + freqs: {"13":0, "17":1} diff --git a/moabb/tests/util_braindecode.py b/moabb/tests/util_braindecode.py index bc1c036fa..2d4809708 100644 --- a/moabb/tests/util_braindecode.py +++ b/moabb/tests/util_braindecode.py @@ -94,7 +94,7 @@ def test_assert_raises_value_error(self, data): transformer.fit(X_train, y=y_train, **{invalid_param_name: None}) def test_type_create_from_X_y_vs_transfomer(self, data): - """Test the type from create_from_X_y() and the transfomer""" + """Test the type from create_from_X_y() and the transformer""" X_train, y_train, _, _ = data dataset = create_from_X_y( diff --git a/pipelines/CCA-SSVEP.yml b/pipelines/CCA-SSVEP.yml index ac94c398e..bae157483 100644 --- a/pipelines/CCA-SSVEP.yml +++ b/pipelines/CCA-SSVEP.yml @@ -9,4 +9,4 @@ pipeline: parameters: n_harmonics: 3 interval: [2, 4] - freqs: { "13": 2, "17": 3, "21": 4 } + freqs: {"13": 2, "17":3, "21":4} diff --git a/pipelines/MsetCCA-SSVEP.yml b/pipelines/MsetCCA-SSVEP.yml index 4538fc05c..cc4c8dc45 100644 --- a/pipelines/MsetCCA-SSVEP.yml +++ b/pipelines/MsetCCA-SSVEP.yml @@ -9,4 +9,4 @@ pipeline: - name: SSVEP_MsetCCA from: moabb.pipelines.classification parameters: - freqs: { "13": 2, "17": 3, "21": 4 } + freqs: {"13":2, "17":3, "21":4} diff --git a/pipelines/TRCA-SSVEP.yml b/pipelines/TRCA-SSVEP.yml index 920de8f2d..39a50d35a 100644 --- a/pipelines/TRCA-SSVEP.yml +++ b/pipelines/TRCA-SSVEP.yml @@ -11,4 +11,4 @@ pipeline: parameters: n_fbands: 5 interval: [2, 4] - freqs: { "13": 2, "17": 3, "21": 4 } + freqs: {"13":2, "17":3, "21":4} diff --git a/tutorials/tutorial_3_benchmarking_multiple_pipelines.py b/tutorials/tutorial_3_benchmarking_multiple_pipelines.py index c7eaa1878..6f902e9b3 100644 --- a/tutorials/tutorial_3_benchmarking_multiple_pipelines.py +++ b/tutorials/tutorial_3_benchmarking_multiple_pipelines.py @@ -1,6 +1,6 @@ """ =========================================== -Tutorial 3: Benchmarking mulitple pipelines +Tutorial 3: Benchmarking multiple pipelines =========================================== In this last part, we extend the previous example by assessing the diff --git a/tutorials/tutorial_4_adding_a_dataset.py b/tutorials/tutorial_4_adding_a_dataset.py index a065f42bc..31b4da29c 100644 --- a/tutorials/tutorial_4_adding_a_dataset.py +++ b/tutorials/tutorial_4_adding_a_dataset.py @@ -146,7 +146,7 @@ def data_path( # Using the ExampleDataset # ------------------------ # -# Now that the `ExampleDataset` is defined, it could be instanciated directly. +# Now that the `ExampleDataset` is defined, it could be instantiated directly. # The rest of the code follows the steps described in the previous tutorials. dataset = ExampleDataset() diff --git a/tutorials/tutorial_5_build_a_custom_dataset.py b/tutorials/tutorial_5_build_a_custom_dataset.py index b7ef6b946..7e6f8d9a1 100644 --- a/tutorials/tutorial_5_build_a_custom_dataset.py +++ b/tutorials/tutorial_5_build_a_custom_dataset.py @@ -47,7 +47,7 @@ # - the sessions. It can be: # - a session name ('session_0') # - a list of sessions (['session_0', 'session_1']) -# - `None` to select all the sessions attributed to a subjet +# - `None` to select all the sessions attributed to a subject # - the runs. As for sessions, it can be a single run name, a list or `None`` (to select all runs). From 4ab1d3cd58a8d7182c322054e0862f4e4a9a9e43 Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Tue, 1 Aug 2023 11:39:16 +0200 Subject: [PATCH 27/64] Re-structuring the moabb core, implementing caching, creating tutorials, fixing some bugs and more (#408) * Update dependencies * First draft (cant handle multi bands) * Fix process_raw when multiple filters * Reorganise structure * Fix consistency * Change logic * Use BIDSPath.rm * Fix filter missing key padlen * Fix __le__ comparison with None * Fix test type * Fix make dataset description missing directory * Update FakeDataset code to have a unique cache * Create temp dir for FakeDataset * Update test cache dataset * Restore old picks * Fix fakedataset key * Add datetime now to bids * Use dict to pass parameters * Remove duplicate line * Update usage * Use pipeline to process raws * Add verbose param * Fix prepare_dataset * Only filter and keep eeg channels (not stim) and fix corresponding tests * More informative message when save fails * Fix python 3.8 syntax * Fix removeprefix missing in python 3.8 * Fix unexplained bug with resting paradigm * Fixing test_fake_dataset * Fixing test_fake_dataset * Add possibility to also save epochs and arrays * Improve warning * Move preprocessing steps into pipelines * Update BaseParadigm.get_data * Restructure base paradigm, Fixes #425 * get events before RawToEpochs pipeline * Add FixedIntervalWindowsProcessing, Closes #424 * Fix process_raw in P300 special case * Catch empty epochs * Fix tuple type for python 3.8 * Pass events instead of labels with arrays * Add test for fixed interval windows * Fix test of empty pipeline case * Fix stop offset of fixed interval epoching * Add FilterBankFixedIntervalWindowsProcessing * Always overwrite meas date * Add example_fixed_interval_windows.py * Add example_bids_conversion.py * remove separation * Simplify test cache * Improve BIDS log * Fix missing annotations in stim datasets * Update cache test * Add paradigm cache test * Add cache example * Fix TypeError python 3.8 * Fix TypeError python 3.8 * Fix TypeError python 3.8 * Simplify "empty" processing pipeline case * Un-break "empty" processing pipeline case * Comment benchmark tests * Fix dataset list overwritten in benchmark * Fix benchmark test * Remove unnecessary test exceptions * Remove unused function * Update whats_new.rst * Clean CacheConfig docstring * Add new paradigms to doc * Fix docs synthax * Rename BIDS example * Rename cache example * Rename interval processing example * Fix example links * Add CacheConfig to doc * Fix doc typos * Fixing typos * Updating the tutorial with my review * Perfect tutorial * small typos and details, nice tutorial * Fixing 79 space * Fixing formation * Adding more comments and docs * fixing E501 * pylinting * fixing line size * pylinter * Fixing docs --------- Co-authored-by: Bru --- docs/source/datasets.rst | 1 + docs/source/paradigms.rst | 25 +- docs/source/whats_new.rst | 21 +- examples/plot_bids_conversion.py | 133 ++++++ examples/plot_disk_cache.py | 263 ++++++++++++ examples/plot_fixed_interval_windows.py | 127 ++++++ moabb/benchmark.py | 7 +- moabb/datasets/base.py | 303 +++++++++++++- moabb/datasets/bids_interface.py | 402 ++++++++++++++++++ moabb/datasets/fake.py | 11 +- moabb/datasets/preprocessing.py | 247 +++++++++++ moabb/datasets/utils.py | 10 +- moabb/paradigms/__init__.py | 1 + moabb/paradigms/base.py | 472 +++++++++++++--------- moabb/paradigms/fixed_interval_windows.py | 264 ++++++++++++ moabb/paradigms/motor_imagery.py | 24 +- moabb/paradigms/p300.py | 192 +-------- moabb/paradigms/ssvep.py | 24 +- moabb/tests/benchmark.py | 16 +- moabb/tests/datasets.py | 81 +++- moabb/tests/paradigms.py | 227 ++++++++++- poetry.lock | 46 ++- pyproject.toml | 4 +- 23 files changed, 2469 insertions(+), 432 deletions(-) create mode 100644 examples/plot_bids_conversion.py create mode 100644 examples/plot_disk_cache.py create mode 100644 examples/plot_fixed_interval_windows.py create mode 100644 moabb/datasets/bids_interface.py create mode 100644 moabb/datasets/preprocessing.py create mode 100644 moabb/paradigms/fixed_interval_windows.py diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 2d8825689..63dc2455d 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -95,6 +95,7 @@ Base & Utils :template: class.rst base.BaseDataset + base.CacheConfig fake.FakeDataset fake.FakeVirtualRealityDataset diff --git a/docs/source/paradigms.rst b/docs/source/paradigms.rst index 4a129dcca..b66dd4c42 100644 --- a/docs/source/paradigms.rst +++ b/docs/source/paradigms.rst @@ -28,8 +28,8 @@ P300 Paradigms :toctree: generated/ :template: class.rst - SinglePass - P300 + SinglePass + P300 --------------- SSVEP Paradigms @@ -39,8 +39,19 @@ SSVEP Paradigms :toctree: generated/ :template: class.rst - SSVEP - FilterBankSSVEP + SSVEP + FilterBankSSVEP + +-------------- +Fixed Interval Windows Processings +-------------- + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + FixedIntervalWindowsProcessing + FilterBankFixedIntervalWindowsProcessing ------------ Base & Utils @@ -53,6 +64,8 @@ Base & Utils motor_imagery.BaseMotorImagery motor_imagery.SinglePass motor_imagery.FilterBank - p300.BaseP300 - ssvep.BaseSSVEP + p300.BaseP300 + ssvep.BaseSSVEP + BaseFixedIntervalWindowsProcessing base.BaseParadigm + base.BaseProcessing diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 47d16f19d..f7fcb0cf5 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -29,25 +29,36 @@ Enhancements - Adding example to load different type of models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Add resting state paradigm with dataset and example (:gh:`400` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) - Speeding the augmentation method by 400% with NumPy vectorization (:gh:`419` by `Bruno Aristimunha`_) -- Improving the review processing with more pre-commit bots (:gh:`434` by `Bruno Aristimunha`_) +- Add possibility to convert datasets to BIDS, plus `example `_ (PR :gh:`408`, PR :gh:`391` by `Pierre Guetschel`_ and `Bruno Aristimunha`_) +- Allow caching intermediate processing steps on disk, plus `example `_ (PR :gh:`408`, issue :gh:`385` by `Pierre Guetschel`_) +- Restructure the paradigms and datasets to move all preprocessing steps to :mod:`moabb.datasets.preprocessing` and as sklearn pipelines (PR :gh:`408` by `Pierre Guetschel`_) +- Add :func:`moabb.paradigms.FixedIntervalWindowsProcessing` and :func:`moabb.paradigms.FilterBankFixedIntervalWindowsProcessing`, plus `example `_ (PR :gh:`408`, issue :gh:`424` by `Pierre Guetschel`_) +- Define :func:`moabb.paradigms.base.BaseProcessing`, common parent to :func:`moabb.paradigms.base.BaseParadigm` and :func:`moabb.paradigms.BaseFixedIntervalWindowsProcessing` (PR :gh:`408` by `Pierre Guetschel`_) +- Allow passing a fixed processing pipeline to :func:`moabb.paradigms.base.BaseProcessing.get_data` and cache its result on disk (PR :gh:`408`, issue :gh:`367` by `Pierre Guetschel`_) +- Update :func:`moabb.datasets.fake.FakeDataset`'s code to be unique for each parameter combination (PR :gh:`408` by `Pierre Guetschel`_) +- Systematically set the annotations when loading data, eventually using the stim channel (PR :gh:`408` by `Pierre Guetschel`_) +- Allow :func:`moabb.datasets.utils.dataset_search` to search across paradigms ``paradigm=None`` (PR :gh:`408` by `Pierre Guetschel`_) +- Improving the review processing with more pre-commit bots (:gh:`435` by `Bruno Aristimunha`_) Bugs ~~~~ - Restore 3 subject from Cho2017 (:gh:`392` by `Igor Carrara`_ and `Sylvain Chevallier`_) - Correct downloading with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_) -- Rename event `substraction` to `subtraction` in :func:`moabb.dataset.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) -- Save parameters of :func:`moabb.dataset.PhysionetMI` (:gh:`403` by `Pierre Guetschel`_) +- Rename event `substraction` to `subtraction` in :func:`moabb.datasets.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) +- Save parameters of :func:`moabb.datasets.PhysionetMI` (:gh:`403` by `Pierre Guetschel`_) - Fixing issue with parallel evaluation (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) - Fixing SSLError from BCI competition IV (:gh:`404` by `Bruno Aristimunha`_) -- Fixing :func:`moabb.dataset.bnci.MNEBNCI.data_path` that returned the data itself instead of paths (:gh:`412` by `Pierre Guetschel`_) +- Fixing :func:`moabb.datasets.bnci.MNEBNCI.data_path` that returned the data itself instead of paths (:gh:`412` by `Pierre Guetschel`_) - Adding :func:`moabb.datasets.fake` in the init file to use in braindecode object (:gh:`414` by `Bruno Aristimunha`_) - Fixing the parallel download issue when the dataset have the same directory (:gh:`421` by `Sara Sedlar`_) - Fixing fixes the problem with the annotation loading for the P300 datasets Sosulski2019, Huebner2017 and Huebner2018 (:gh:`396` by `Sara Sedlar`_) - Removing the print in the dataset list (:gh:`423` by `Bruno Aristimunha`_) - Fixing bug in :func:`moabb.pipeline.utils_pytorch.BraindecodeDatasetLoader` where incorrect y was used in transform calls (:gh:`426` by `Gabriel Schwartz`_) - Fixing one test in :func:`moabb.pipeline.utils_pytorch.BraindecodeDatasetLoader` (:gh:`426` by `Bruno Aristimunha`_) - +- Fix :func:`moabb.benchmark` overwriting ``include_datasets`` list (:gh:`408` by `Pierre Guetschel`_) +- Fix :func:`moabb.paradigms.base.BaseParadigm` using attributes before defining them (PR :gh:`408`, issue :gh:`425` by `Pierre Guetschel`_) +- Fix :func:`moabb.paradigms.FakeImageryParadigm`, :func:`moabb.paradigms.FakeP300Paradigm` and :func:`moabb.paradigms.FakeSSVEPParadigm` ``is_valid`` methods to only accept the correct datasets (PR :gh:`408` by `Pierre Guetschel`_) API changes ~~~~~~~~~~~ diff --git a/examples/plot_bids_conversion.py b/examples/plot_bids_conversion.py new file mode 100644 index 000000000..dce4deb0b --- /dev/null +++ b/examples/plot_bids_conversion.py @@ -0,0 +1,133 @@ +""" +=============================== +Convert a MOABB dataset to BIDS +=============================== + +The Brain Imaging Data Structure (BIDS) format +is standard for storing neuroimaging data. +It follows fixed principles to facilitate the +sharing of neuroimaging data between researchers. + +The MOABB library allows to convert any MOABB dataset to +BIDS [1]_ and [2]_. + +In this example, we will convert the AlexMI dataset to BIDS using the +option `cache_config=dict(path=temp_dir, save_raw=True)` of the get_data +method from the dataset object. + +This will automatically save the raw data in the BIDS format and allow to use +a cache for the next time the dataset is used. + +We will use the AlexMI dataset [3]_, one of the smallest in +people and one that can be downloaded quickly. + +References +----------- + +.. [1] Pernet, C.R., Appelhoff, S., Gorgolewski, K.J. et al. EEG-BIDS, + An extension to the brain imaging data structure for + electroencephalography. Sci Data 6, 103 (2019). + https://doi.org/10.1038/s41597-019-0104-8 + +.. [2] Appelhoff et al., (2019). MNE-BIDS: Organizing electrophysiological + data into the BIDS format and facilitating their analysis. + Journal of Open Source Software, 4(44), 1896, + https://doi.org/10.21105/joss.01896 + +.. [3] Barachant, A., 2012. Commande robuste d'un effecteur par une + interface cerveau machine EEG asynchrone (Doctoral dissertation, + Université de Grenoble). + https://tel.archives-ouvertes.fr/tel-01196752 + +""" +# Authors: Pierre Guetschel +# +# License: BSD (3-clause) + +import shutil +import tempfile +from pathlib import Path + +import mne + +from moabb import set_log_level +from moabb.datasets import AlexMI + + +set_log_level("info") + +############################################################################### +# Basic usage +# ----------- +# +# Here, we will save the BIDS version of the dataset in a temporary folder +temp_dir = Path(tempfile.mkdtemp()) +# The conversion of any MOABB dataset to a BIDS-compliant structure can be done +# by simply calling its ``get_data`` method and using the ``cache_config`` +# parameter. This parameter is a dictionary. +dataset = AlexMI() +_ = dataset.get_data(cache_config=dict(path=temp_dir, save_raw=True)) + + +############################################################################### +# Before / after folder structure +# ----------------------------- +# +# To investigate what was saved, we will first define a function to print +# the folder structure of a given path: +def print_tree(p: Path, last=True, header=""): + elbow = "└──" + pipe = "│ " + tee = "├──" + blank = " " + print(header + (elbow if last else tee) + p.name) + if p.is_dir(): + children = list(p.iterdir()) + for i, c in enumerate(children): + print_tree( + c, header=header + (blank if last else pipe), last=i == len(children) - 1 + ) + + +############################################################################### +# Now, we will retrieve the location of the original dataset. It is stored +# in the MNE data directory, which can be found with the ``"MNE_DATA"`` key: +mne_data = Path(mne.get_config("MNE_DATA")) +print(f"MNE data directory: {mne_data}") + +############################################################################### +# Now, we can print the folder structure of the original dataset: +print("Before conversion:") +print_tree(mne_data / "MNE-alexeeg-data") + +############################################################################### +# As we can see, before conversion, all the data (i.e. from all subjects, +# sessions and runs) is stored in a single folder. This follows no particular +# standard and can vary from one dataset to another. +# +# After conversion, the data is stored in a BIDS-compliant way: +print("After conversion:") +print_tree(temp_dir / "MNE-alexandre motor imagery-bids-cache") + +############################################################################### +# In the BIDS version of our dataset, the raw files are saved in EDF. +# The data is organized in a hierarchy of folders, +# starting with the subjects, then the sessions, and then the runs. Metadata +# files are stored to describe the data. For more details on the BIDS +# structure, please refer to the `BIDS website `_ +# and the `BIDS spec `_. +# +# Under the hood, saving datasets to BIDS is done through the caching system +# of MOABB. Only raw EEG files are officially supported by the BIDS +# specification. +# However, MOABB's caching mechanism also offers the possibility to save +# the data in a pseudo-BIDS after different preprocessing steps. +# In particular, we can save :class:`mne.Epochs` and ``np.ndarray`` objects. +# For more details on the caching system, +# please refer to the tutorial :doc:`./plot_disk_cache`. +# +# Cleanup +# ------- +# +# Finally, we can delete the temporary folder: +shutil.rmtree(temp_dir) diff --git a/examples/plot_disk_cache.py b/examples/plot_disk_cache.py new file mode 100644 index 000000000..0529088c0 --- /dev/null +++ b/examples/plot_disk_cache.py @@ -0,0 +1,263 @@ +""" +================================================= +Cache on disk intermediate data processing states +================================================= + +This example shows how intermediate data processing +states can be cached on disk to speed up the loading +of this data in subsequent calls. + +When a MOABB paradigm processes a dataset, it will +first apply processing steps to the raw data, this is +called the ``raw_pipeline``. Then, it will convert the +raw data into epochs and apply processing steps on the +epochs, this is called the ``epochs_pipeline``. +Finally, it will eventually convert the epochs into arrays, +this is called the ``array_pipeline``. In summary: + +``raw_pipeline`` --> ``epochs_pipeline`` --> ``array_pipeline`` + +After each step, MOABB offers the possibility to save on disk +the result of the step. This is done by setting the ``cache_config`` +parameter of the paradigm's ``get_data`` method. +The ``cache_config`` parameter is a dictionary that can take all +the parameters of ``moabb.datasets.base.CacheConfig`` as keys, +they are the following: ``use``, ``save_raw``, ``save_epochs``, +``save_array``, ``overwrite_raw``, ``overwrite_epochs``, +``overwrite_array``, and ``path``. You can also directly pass a +``CacheConfig`` object as ``cache_config``. + +If ``use=False``, the ``save_*`` and ``overwrite_*`` +parameters are ignored. + +When trying to use the cache (i.e. ``use=True``), MOABB will +first check if there exist a cache of the result of the full +pipeline (i.e. ``raw_pipeline`` --> ``epochs_pipeline`` -> +``array_pipeline``). +If there is none, we remove the last step of the pipeline and +look for its cached result. We keep removing steps and looking +for a cached result until we find one or until we reach an +empty pipeline. +Every time, if the ``overwrite_*`` parameter +of the corresponding step is true, we first try to erase the +cache of this step. +Once a cache has been found or the empty pipeline has been reached, +depending on the case we either load the cache or the original dataset. +Then, apply the missing steps one by one and save their result +if their corresponding ``save_*`` parameter is true. + +By default, only the result of the ``raw_pipeline`` is saved. +This is usually a good compromise between speed and disk space +because, when using cached raw data, the epochs can be obtained +without preloading the whole raw signals, only the necessary +intervals. Yet, because only the raw data is cached, the epoching +parameters can be changed without creating a new cache each time. +However, if your epoching parameters are fixed, you can directly +cache the epochs or the arrays to speed up the loading and +reduce the disk space used. + +.. note:: + The ``cache_config`` parameter is also available for the ``get_data`` + method of the datasets. It works the same way as for a + paradigm except that it will save un-processed raw recordings. +""" +# Authors: Pierre Guetschel +# +# License: BSD (3-clause) + +import shutil +import tempfile + +############################################################################### +import time +from pathlib import Path + +from moabb import set_log_level +from moabb.datasets import Zhou2016 +from moabb.paradigms import LeftRightImagery + + +set_log_level("info") + +############################################################################### +# Basic usage +# ----------- +# +# The ``cache_config`` parameter is a dictionary that has the +# following default values: +default_cache_config = dict( + save_raw=False, + save_epochs=False, + save_array=False, + use=False, + overwrite_raw=False, + overwrite_epochs=False, + overwrite_array=False, + path=None, +) + +############################################################################### +# You don not need to specify all the keys of ``cache_config``, only the ones +# you want to change. +# +# By default, the cache is saved at the MNE data directory (i.e. when +# ``path=None``). The MNE data directory can be found with +# ``mne.get_config('MNE_DATA')``. For this example, we will save it in a +# temporary directory instead: +temp_dir = Path(tempfile.mkdtemp()) + +############################################################################### +# We will use the Zhou2016 dataset and the LeftRightImagery paradigm in this +# example, but this works for any dataset and paradigm pair.: +dataset = Zhou2016() +paradigm = LeftRightImagery() + +############################################################################### +# And we will only use the first subject for this example: +subjects = [1] + +############################################################################### +# Then, saving a cache can simply be done by setting the desired parameters +# in the ``cache_config`` dictionary: +cache_config = dict( + use=True, + save_raw=True, + save_epochs=True, + save_array=True, + path=temp_dir, +) +_ = paradigm.get_data(dataset, subjects, cache_config=cache_config) + +############################################################################### +# Time comparison +# --------------- +# +# Now, we will compare the time it takes to load the with different levels of +# cache. For this, we will use the cache saved in the previous block and +# overwrite the steps results one by one so that we can compare the time it +# takes to load the data and compute the missing steps with an increasing +# number of missing steps. +# +# Using array cache: +cache_config = dict( + use=True, + path=temp_dir, + save_raw=False, + save_epochs=False, + save_array=False, + overwrite_raw=False, + overwrite_epochs=False, + overwrite_array=False, +) +t0 = time.time() +_ = paradigm.get_data(dataset, subjects, cache_config=cache_config) +t_array = time.time() - t0 + +############################################################################### +# Using epochs cache: +cache_config = dict( + use=True, + path=temp_dir, + save_raw=False, + save_epochs=False, + save_array=False, + overwrite_raw=False, + overwrite_epochs=False, + overwrite_array=True, +) +t0 = time.time() +_ = paradigm.get_data(dataset, subjects, cache_config=cache_config) +t_epochs = time.time() - t0 + +############################################################################### +# Using raw cache: +cache_config = dict( + use=True, + path=temp_dir, + save_raw=False, + save_epochs=False, + save_array=False, + overwrite_raw=False, + overwrite_epochs=True, + overwrite_array=True, +) +t0 = time.time() +_ = paradigm.get_data(dataset, subjects, cache_config=cache_config) +t_raw = time.time() - t0 + +############################################################################### +# Using no cache: +cache_config = dict( + use=False, + path=temp_dir, + save_raw=False, + save_epochs=False, + save_array=False, + overwrite_raw=False, + overwrite_epochs=False, + overwrite_array=False, +) +t0 = time.time() +_ = paradigm.get_data(dataset, subjects, cache_config=cache_config) +t_nocache = time.time() - t0 + +############################################################################### +# Time needed to load the data with different levels of cache: +print(f"Using array cache: {t_array:.2f} seconds") +print(f"Using epochs cache: {t_epochs:.2f} seconds") +print(f"Using raw cache: {t_raw:.2f} seconds") +print(f"Without cache: {t_nocache:.2f} seconds") + +############################################################################### +# As you can see, using a raw cache is more than 5 times faster than +# without cache. +# This is because when using the raw cache, the data is not preloaded, only +# the desired epochs are loaded in memory. +# +# Using the epochs cache is a little faster than the raw cache. This is because +# there are several preprocessing steps done after the epoching by the +# ``epochs_pipeline``. This difference would be greater if the ``resample`` +# argument was different that the sampling frequency of the dataset. Indeed, +# the data loading time is directly proportional to its sampling frequency +# and the resampling is done by the ``epochs_pipeline``. +# +# Finally, we observe very little difference between array and epochs cache. +# The main interest of the array cache is when the user passes a +# computationally heavy but fixed additional preprocessing (for example +# computing the covariance matrices of the epochs). This can be done by using +# the ``processing_pipeline`` argument. The output of this additional pipeline +# (necessary a numpy array) will be saved to avoid re-computing it each time. +# +# +# Technical details +# ----------------- +# +# Under the hood, the cache is saved on disk in a Brain Imaging Data Structure +# (BIDS) compliant format. More details on this structure can be found in the +# tutorial :doc:`./plot_bids_conversion`. +# +# However, there are two particular aspects of the way MOABB saves the data +# that are not specific to BIDS: +# +# * For each file, we set a +# `description key `_. +# This key is a code that corresponds to a hash of the +# pipeline that was used to generate the data (i.e. from raw to the state +# of the cache). This code is unique for each different pipeline and allows +# to identify all the files that were generated by the same pipeline. +# * Once we finish saving all the files for a given combination of dataset, +# subject, and pipeline, we write a file ending in ``"_lockfile.json"`` at +# the root directory of this subject. This file serves two purposes: +# +# * It indicates that the cache is complete for this subject and pipeline. +# If it is not present, it means that something went wrong during the +# saving process and the cache is incomplete. +# * The file contains the un-hashed string representation of the pipeline. +# Therefore, it can be used to identify the pipeline used without having +# to decode the description key. +# +# Cleanup +# ------- +# +# Finally, we can delete the temporary folder: +shutil.rmtree(temp_dir) diff --git a/examples/plot_fixed_interval_windows.py b/examples/plot_fixed_interval_windows.py new file mode 100644 index 000000000..85e7723e2 --- /dev/null +++ b/examples/plot_fixed_interval_windows.py @@ -0,0 +1,127 @@ +""" +================================= +Fixed interval windows processing +================================= + +This example shows how to process a dataset using the +:class:`moabb.paradigms.FixedIntervalWindowsProcessing` paradigm. This paradigm +creates epochs at fixed intervals, ignoring the stim +channel and events of the datasets. Therefore, it is +compatible with all the datasets. Unfortunately, +this paradigm is not compatible with the MOABB evaluation +framework. However, it can be used to process datasets +for unsupervised algorithms. + +In this example, we will use the Zhou2016 dataset because +it is relatively small and can be downloaded quickly. +""" +# Authors: Pierre Guetschel +# +# License: BSD (3-clause) + +############################################################################### +import matplotlib.pyplot as plt +import mne +import numpy as np + +from moabb import set_log_level +from moabb.datasets import Zhou2016 +from moabb.paradigms import FixedIntervalWindowsProcessing, MotorImagery + + +set_log_level("info") + +############################################################################### +# Process a dataset +# ----------------- +# +# To process a dataset with +# :class:`moabb.paradigms.FixedIntervalWindowsProcessing` , you can use the +# method as with every other paradigm. The only additional parameters are +# ``length``, ``stride``, ``start_offset``, and ``stop_offset``. They are +# all parametrised in seconds. ``length`` is the length of the epochs, +# ``stride`` is the time between the onset of two consecutive epochs, +# ``start_offset`` is the offset between each run start and their first +# epoch, and ``stop_offset`` is the offset between each run start and their +# last epoch. The default values are ``length=5``, ``stride=10``, +# ``start_offset=0``, and ``stop_offset=None`` (i.e. end of the run). +# +# An example usage of :class:`moabb.paradigms.FixedIntervalWindowsProcessing` +# with the :class:`moabb.datasets.Zhou2016` dataset: +dataset = Zhou2016() +processing = FixedIntervalWindowsProcessing( + # new parameters: + length=100, + stride=50, + start_offset=300, + stop_offset=900, # we epoch 10 minutes per run, starting at 200 seconds + # parameters common with other paradigms: + resample=100, + fmin=7, + fmax=45, + baseline=None, + channels=None, +) +X, labels, metadata = processing.get_data(dataset=dataset, subjects=[1]) + +############################################################################### +# In this dataset, there are three sessions per subject and two runs per +# session: +for column in metadata.columns: + print(f"{column}s: {metadata[column].unique()}") + +############################################################################### +# We expect to obtained ``(stop_offset - start_offset - length) / stride`` +# = (900-300-100)/50 = 10 epochs per run. Here we have 3*2=6 runs. +# And indeed, we obtain +# a total of 6*10=60 epochs: +print(f"Number of epochs: {len(X)}") + +############################################################################### +# .. note:: +# To apply a bank of bandpass filters, you can use the +# :class:`moabb.paradigms.FilterBankFixedIntervalWindowsProcessing` +# paradigm instead. +# +# Print the events +# --------------------------------- +# +# We can print the position of the created epochs within the run next to +# the original events of the dataset. For this, we will first instantiate +# a :class:`moabb.paradigms.MotorImagery` paradigm to recover the original +# events of the dataset: +paradigm = MotorImagery( + resample=100, + fmin=7, + fmax=45, + baseline=None, + channels=None, +) + +############################################################################### +# Then, we can recover the events of both paradigms using the +# ``_get_events_pipeline`` method: +events_pipeline_dataset = paradigm._get_events_pipeline(dataset) +events_pipeline_fixed = processing._get_events_pipeline(dataset) +raw = dataset.get_data(subjects=[1])[1]["session_0"]["run_0"] +events_dataset = events_pipeline_dataset.transform(raw) +events_fixed = events_pipeline_fixed.transform(raw) +events = np.concatenate([events_dataset, events_fixed]) +event_id = dict(**paradigm.used_events(dataset), **processing.used_events(dataset)) + +############################################################################### +# Finally, we can plot the events. The artificial events created by +# :class:`moabb.paradigms.FixedIntervalWindowsProcessing` are named +# ``"Windows"``: +fig = mne.viz.plot_events( + events, + sfreq=raw.info["sfreq"], + event_id=event_id, +) +fig.subplots_adjust(right=0.7) +plt.show() + +############################################################################### +# We can see that the epochs were effectively created at a fixed interval +# every 50 seconds between 300 and 900 seconds, and ignoring +# the original events of the dataset. diff --git a/moabb/benchmark.py b/moabb/benchmark.py index 5aae3a505..89fca3657 100644 --- a/moabb/benchmark.py +++ b/moabb/benchmark.py @@ -28,7 +28,6 @@ except ImportError: _carbonfootprint = False - log = logging.getLogger(__name__) @@ -305,14 +304,14 @@ def _inc_exc_datasets(datasets, include_datasets, exclude_datasets): else: # The case where the class instances have been given # can be passed on directly - d = include_datasets + d = list(include_datasets) if exclude_datasets is not None: raise AttributeError( "You could not specify both include and exclude datasets" ) elif exclude_datasets is not None: - d = datasets + d = list(datasets) # Assert if the inputs are not key_codes i.e. expected to be dataset class objects if not isinstance(exclude_datasets[0], str): # Convert the input to key_codes @@ -323,5 +322,5 @@ def _inc_exc_datasets(datasets, include_datasets, exclude_datasets): for excdat in exclude_datasets: del d[datasets_codes.index(excdat)] else: - d = datasets + d = list(datasets) return d diff --git a/moabb/datasets/base.py b/moabb/datasets/base.py index 58da45d81..9eae59410 100644 --- a/moabb/datasets/base.py +++ b/moabb/datasets/base.py @@ -1,16 +1,132 @@ -""" -Base class for a dataset -""" +"""Base class for a dataset.""" import abc import logging +import traceback +from dataclasses import dataclass +from enum import Enum from inspect import signature +from pathlib import Path +from typing import Dict, Type, Union + +from sklearn.pipeline import Pipeline, make_pipeline + +from moabb.datasets.bids_interface import ( + BIDSInterfaceBase, + BIDSInterfaceEpochs, + BIDSInterfaceNumpyArray, + BIDSInterfaceRawEDF, +) +from moabb.datasets.preprocessing import ( + EpochsToEvents, + ForkPipelines, + RawToEvents, + SetRawAnnotations, +) log = logging.getLogger(__name__) +@dataclass +class CacheConfig: + """ + Configuration for caching of datasets. + + Parameters + ---------- + save_*: bool + This flag specifies whether to save the output of the corresponding + step to disk. + use: bool + This flag specifies whether to use the disk cache in case it exists. + If True, the Raw or Epochs objects returned will not be preloaded + (this saves some time). Otherwise, they will be preloaded. + If use is False, the save_* and overwrite_* keys will be ignored. + overwrite_*: bool + This flag specifies whether to overwrite the disk cache in + case it exist. + path : None | str + Location of where to look for the data storing location. + If None, the environment variable or config parameter + ``MNE_DATASETS_(signifier)_PATH`` is used. If it doesn't exist, the + "~/mne_data" directory is used. If the dataset + is not found under the given path, the data + will be automatically downloaded to the specified folder. + verbose: + Verbosity level. See mne.verbose. + """ + + save_raw: bool = False + save_epochs: bool = False + save_array: bool = False + + use: bool = False + + overwrite_raw: bool = False + overwrite_epochs: bool = False + overwrite_array: bool = False + + path: Union[str, Path] = None + verbose: str = None + + @classmethod + def make(cls, dic: Union[None, Dict, "CacheConfig"] = None) -> "CacheConfig": + """ + Create a CacheConfig object from a dict or another CacheConfig object. + + Examples + ------- + Using default parameters: + + >>> CacheConfig.make() + CacheConfig(save=True, use=True, overwrite=True, path=None) + + From a dict: + + >>> dic = {'save': False} + >>> CacheConfig.make(dic) + CacheConfig(save=False, use=True, overwrite=True, path=None) + """ + if dic is None: + return cls() + elif isinstance(dic, dict): + return cls(**dic) + elif isinstance(dic, cls): + return dic + else: + raise ValueError(f"Expected dict or CacheConfig, got {type(dic)}") + + +class StepType(Enum): + """Enum for the different steps in the pipeline.""" + + RAW = "raw" + EPOCHS = "epochs" + ARRAY = "array" + + +_interface_map: Dict[StepType, Type[BIDSInterfaceBase]] = { + StepType.RAW: BIDSInterfaceRawEDF, + StepType.EPOCHS: BIDSInterfaceEpochs, + StepType.ARRAY: BIDSInterfaceNumpyArray, +} + + +def apply_step(pipeline, obj): + """Apply a pipeline to an object.""" + if obj is None: + return None + try: + return pipeline.transform(obj) + except ValueError as error: + # no events received by RawToEpochs: + if str(error) == "No events found": + return None + raise error + + class BaseDataset(metaclass=abc.ABCMeta): - """BaseDataset + """Abstract Moabb BaseDataset. Parameters required for all datasets @@ -60,6 +176,7 @@ def __init__( doi=None, unit_factor=1e6, ): + """Initialize function for the BaseDataset.""" try: _ = iter(subjects) except TypeError: @@ -74,14 +191,22 @@ def __init__( self.doi = doi self.unit_factor = unit_factor - def get_data(self, subjects=None): + def get_data( + self, + subjects=None, + cache_config=None, + raw_pipeline=None, + epochs_pipeline=None, + array_pipeline=None, + events_pipeline=None, + ): """Return the data correspoonding to a list of subjects. The returned data is a dictionary with the following structure:: data = {'subject_id' : {'session_id': - {'run_id': raw} + {'run_id': run} } } @@ -90,10 +215,42 @@ def get_data(self, subjects=None): EEG cap. A session is constitued of at least one run. A run is a single contiguous recording. Some dataset break session in multiple runs. + Processing steps can optionally be applied to the data using the + ``*_pipeline`` arguments. These pipelines are applied in the + following order: ``raw_pipeline`` -> ``epochs_pipeline`` -> + ``array_pipeline``. If a ``*_pipeline`` argument is ``None``, + the step will be skipped. Therefore, the ``array_pipeline`` may + either receive a :class:`mne.io.Raw` or a :class:`mne.Epochs` object + as input depending on whether ``epochs_pipeline`` is ``None`` or not. + Parameters ---------- subjects: List of int List of subject number + cache_config: dict | CacheConfig + Configuration for caching of datasets. See ``CacheConfig`` + for details. + raw_pipeline: sklearn.pipeline.Pipeline | sklearn.base.TransformerMixin + | None + Pipeline that necessarily takes a mne.io.Raw as input, + and necessarily returns a :class:`mne.io.Raw` as output. + epochs_pipeline: sklearn.pipeline.Pipeline | + sklearn.base.TransformerMixin | None + Pipeline that necessarily takes a mne.io.Raw as input, + and necessarily returns a :class:`mne.Epochs` as output. + array_pipeline: sklearn.pipeline.Pipeline | + sklearn.base.TransformerMixin | None + Pipeline either takes as input a :class:`mne.Epochs` if + epochs_pipeline is not ``None``, or a :class:`mne.io.Raw` + otherwise. It necessarily returns a :func:`numpy.ndarray` + as output. + If array_pipeline is not None, each run will be a + dict with keys "X" and "y" corresponding respectively to the array + itself and the corresponding labels. + events_pipeline: sklearn.pipeline.Pipeline | + sklearn.base.TransformerMixin | None + Pipeline used to generate the events. Only used if + ``array_pipeline`` is not ``None``. Returns ------- @@ -104,13 +261,47 @@ def get_data(self, subjects=None): subjects = self.subject_list if not isinstance(subjects, list): - raise (ValueError("subjects must be a list")) + raise ValueError("subjects must be a list") + + if events_pipeline is None and array_pipeline is not None: + log.warning( + f"event_id not specified, using all the dataset's " + f"events to generate labels: {self.event_id}" + ) + events_pipeline = ( + RawToEvents(self.event_id) + if epochs_pipeline is None + else EpochsToEvents() + ) + + cache_config = CacheConfig.make(cache_config) + + steps = [] + steps.append((StepType.RAW, SetRawAnnotations(self.event_id))) + if raw_pipeline is not None: + steps.append((StepType.RAW, raw_pipeline)) + if epochs_pipeline is not None: + steps.append((StepType.EPOCHS, epochs_pipeline)) + if array_pipeline is not None: + array_events_pipeline = ForkPipelines( + [ + ("X", array_pipeline), + ("events", events_pipeline), + ] + ) + steps.append((StepType.ARRAY, array_events_pipeline)) + if len(steps) == 0: + steps.append((StepType.RAW, make_pipeline(None))) data = dict() for subject in subjects: if subject not in self.subject_list: raise ValueError("Invalid subject {:d} given".format(subject)) - data[subject] = self._get_single_subject_data(subject) + data[subject] = self._get_single_subject_data_using_cache( + subject, + cache_config, + steps, + ) return data @@ -174,6 +365,102 @@ def download( verbose=verbose, ) + def _get_single_subject_data_using_cache(self, subject, cache_config, steps): + """Load a single subject's data using cache. + + Either load the data of a single subject from disk cache or from the + dataset object, + then eventually saves or overwrites the cache version depending on the + parameters. + """ + splitted_steps = [] # list of (cached_steps, remaining_steps) + if cache_config.use: + splitted_steps += [ + (steps[:i], steps[i:]) for i in range(len(steps), 0, -1) + ] # [len(steps)...1] + splitted_steps.append( + ([], steps) + ) # last option: if cached_steps is [], we don't use cache, i.e. i=0 + + for cached_steps, remaining_steps in splitted_steps: + sessions_data = None + # Load and eventually overwrite: + if len(cached_steps) == 0: # last option: we don't use cache + sessions_data = self._get_single_subject_data(subject) + assert sessions_data is not None # should not happen + else: + cache_type = cached_steps[-1][0] + interface = _interface_map[cache_type]( + self, + subject, + path=cache_config.path, + process_pipeline=Pipeline(cached_steps), + verbose=cache_config.verbose, + ) + + if ( + (cache_config.overwrite_raw and cache_type is StepType.RAW) + or (cache_config.overwrite_epochs and cache_type is StepType.EPOCHS) + or (cache_config.overwrite_array and cache_type is StepType.ARRAY) + ): + interface.erase() + elif cache_config.use: # can't load if it was just erased + sessions_data = interface.load( + preload=False + ) # None if cache inexistent + + # If no cache was found or if it was erased, try the next option: + if sessions_data is None: + continue + + # Apply remaining steps and save: + for step_idx, (step_type, process_pipeline) in enumerate(remaining_steps): + # apply one step: + sessions_data = { + session: { + run: apply_step(process_pipeline, raw) + for run, raw in runs.items() + } + for session, runs in sessions_data.items() + } + + # save: + if ( + ( + cache_config.save_raw + and step_type is StepType.RAW + and ( + (step_idx == len(remaining_steps) - 1) + or (remaining_steps[step_idx + 1][0] is not StepType.RAW) + ) + ) # we only save the last raw step + or (cache_config.save_epochs and step_type is StepType.EPOCHS) + or (cache_config.save_array and step_type is StepType.ARRAY) + ): + interface = _interface_map[step_type]( + self, + subject, + path=cache_config.path, + process_pipeline=Pipeline( + cached_steps + remaining_steps[: step_idx + 1] + ), + verbose=cache_config.verbose, + ) + try: + interface.save(sessions_data) + except Exception: + log.warning( + f"Failed to save {interface.__repr__()} " + f"to BIDS format:\n" + f"{' Pipeline: '.center(50, '#')}\n" + f"{interface.process_pipeline.__repr__()}\n" + f"{' Exception: '.center(50, '#')}\n" + f"{''.join(traceback.format_exc())}{'#' * 50}" + ) + interface.erase() # remove partial cache + return sessions_data + raise ValueError("should not happen") + @abc.abstractmethod def _get_single_subject_data(self, subject): """Return the data of a single subject. diff --git a/moabb/datasets/bids_interface.py b/moabb/datasets/bids_interface.py new file mode 100644 index 000000000..55f2c98dc --- /dev/null +++ b/moabb/datasets/bids_interface.py @@ -0,0 +1,402 @@ +"""BIDS Interface for MOABB. + +======================== + +This module contains the BIDS interface for MOABB, which allows to convert +any MOABB dataset to BIDS with Cache. +We can convert at the Raw, Epochs or Array level. +""" + +# Authors: Pierre Guetschel +# +# License: BSD (3-clause) + +import abc +import datetime +import json +import logging +from collections import OrderedDict +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING + +import mne +import mne_bids +from numpy import load as np_load +from numpy import save as np_save + +import moabb +from moabb.analysis.results import get_digest +from moabb.datasets import download as dl + + +if TYPE_CHECKING: + from sklearn.pipeline import Pipeline + + from moabb.datasets.base import BaseDataset + +log = logging.getLogger(__name__) + + +def subject_moabb_to_bids(subject: int): + """Convert the subject number to string (subject).""" + return str(subject) + + +def subject_bids_to_moabb(subject: str): + """Convert the subject string to int(subject).""" + return int(subject) + + +def session_moabb_to_bids(session: str): + """Replace the session_* to *.""" + return session.replace("session_", "") + + +def session_bids_to_moabb(session: str): + """Replace the * to session_*.""" + return "session_" + session + + +# Note: the runs are expected to be indexes in the BIDS standard. +# This is not always the case in MOABB. See: +# bids-specification.readthedocs.io/en/stable/glossary.html#run-entities +def run_moabb_to_bids(run: str): + """Replace the run_* to *.""" + return run.replace("run_", "") + + +def run_bids_to_moabb(run: str): + """Replace the * to run_*.""" + return "run_" + run + + +@dataclass +class BIDSInterfaceBase(abc.ABC): + """Base class for BIDSInterface. + + This dataclass is used to convert a MOABB dataset to MOABB BIDS. + It is used by the ``get_data`` method of any MOABB dataset. + + Parameters + ---------- + dataset : BaseDataset + The dataset to convert. + subject : int + The subject to convert. + path : str + The path to the BIDS dataset. + process_pipeline : Pipeline + The processing pipeline used to convert the data. + verbose : str + The verbosity level. + """ + + dataset: "BaseDataset" + subject: int + path: str = None + process_pipeline: "Pipeline" = None + verbose: str = None + + @property + def processing_params(self): + """Return the processing parameters.""" + # TODO: add dataset kwargs + return self.process_pipeline + + @property + def desc(self): + """Return the description of the processing pipeline.""" + return get_digest(self.processing_params) + + def __repr__(self): + """Return the representation of the BIDSInterface.""" + return ( + f"{self.dataset.code!r} sub-{self.subject} " + f"datatype-{self._datatype} desc-{self.desc:.7}" + ) + + @property + def root(self): + """Return the root path of the BIDS dataset.""" + code = self.dataset.code + "-BIDS" + mne_path = Path(dl.get_dataset_path(code, self.path)) + cache_dir = f"MNE-{code.lower()}-cache" + cache_path = mne_path / cache_dir + + return cache_path + + @property + def lock_file(self): + """Return the lock file path. + + this file was saved last to ensure that the subject's data was + completely saved this is not an official bids file + """ + return mne_bids.BIDSPath( + root=self.root, + subject=subject_moabb_to_bids(self.subject), + description=self.desc, + extension=".json", + suffix="lockfile", # necessary for unofficial files + check=False, + ) + + def erase(self): + """Erase the cache of the subject.""" + log.info("Starting erasing cache of %s...", repr(self)) + path = mne_bids.BIDSPath( + root=self.root, + subject=subject_moabb_to_bids(self.subject), + description=self.desc, + check=False, + ) + path.rm(safe_remove=False) + log.info("Finished erasing cache of %s.", repr(self)) + + def load(self, preload=False): + """Load the cache of the subject.""" + log.info("Attempting to retrieve cache of %s...", repr(self)) + self.lock_file.mkdir(exist_ok=True) + if not self.lock_file.fpath.exists(): + log.info("No cache found at %s.", {str(self.lock_file.directory)}) + return None + paths = mne_bids.find_matching_paths( + root=self.root, + subjects=subject_moabb_to_bids(self.subject), + descriptions=self.desc, + extensions=self._extension, + check=self._check, + datatypes=self._datatype, + suffixes=self._datatype, + ) + sessions_data = {} + for path in paths: + session_moabb = session_bids_to_moabb(path.session) + session = sessions_data.setdefault(session_moabb, {}) + run = self._load_file(path, preload=preload) + session[run_bids_to_moabb(path.run)] = run + log.info("Finished reading cache of %s", {repr(self)}) + return sessions_data + + def save(self, sessions_data): + """Save the cache of the subject.""" + log.info("Starting caching %s", {repr(self)}) + mne_bids.BIDSPath(root=self.root).mkdir(exist_ok=True) + mne_bids.make_dataset_description( + path=str(self.root), + name=self.dataset.code, + dataset_type="derivative", + generated_by=[ + dict( + CodeURL="https://github.com/NeuroTechX/moabb", + Name="moabb", + Description="Mother of All BCI Benchmarks", + Version=moabb.__version__, + ) + ], + source_datasets=[ + dict( + DOI=self.dataset.doi, + ) + ], + overwrite=False, + verbose=self.verbose, + ) + + for session, runs in sessions_data.items(): + for run, obj in runs.items(): + if obj is None: + log.warning( + "Skipping caching %s session " + "%s run %s because " + "it is None.", + (repr(self), session, run), + ) + continue + + bids_path = mne_bids.BIDSPath( + root=self.root, + subject=subject_moabb_to_bids(self.subject), + session=session_moabb_to_bids(session), + task=self.dataset.paradigm, + run=run_moabb_to_bids(run), + description=self.desc, + extension=self._extension, + datatype=self._datatype, + suffix=self._datatype, + check=self._check, + ) + + bids_path.mkdir(exist_ok=True) + self._write_file(bids_path, obj) + log.debug("Writing", self.lock_file) + self.lock_file.mkdir(exist_ok=True) + with self.lock_file.fpath.open("w") as file: + dic = dict(processing_params=str(self.processing_params)) + json.dump(dic, file) + log.info("Finished caching %s to disk.", repr(self)) + + @abc.abstractmethod + def _load_file(self, bids_path, preload): + pass + + @abc.abstractmethod + def _write_file(self, bids_path, obj): + pass + + @property + @abc.abstractmethod + def _extension(self): + pass + + @property + @abc.abstractmethod + def _check(self): + pass + + @property + @abc.abstractmethod + def _datatype(self): + pass + + +class BIDSInterfaceRawEDF(BIDSInterfaceBase): + """BIDS Interface for Raw EDF files. Selected .edf type only.""" + + @property + def _extension(self): + return ".edf" + + @property + def _check(self): + return True + + @property + def _datatype(self): + return "eeg" + + def _load_file(self, bids_path, preload): + raw = mne_bids.read_raw_bids( + bids_path, extra_params=dict(preload=preload), verbose=self.verbose + ) + return raw + + def _write_file(self, bids_path, raw): + if not raw.annotations: + raise ValueError( + "Raw object must have annotations to be saved in BIDS format." + "Use the SetRawAnnotations pipeline for this." + ) + datetime_now = datetime.datetime.now(tz=datetime.timezone.utc) + if raw.info.get("line_freq", None) is None: + # specify line frequency if not present as required by BIDS + raw.info["line_freq"] = 50 + if raw.info.get("subject_info", None) is None: + # specify subject info as required by BIDS + raw.info["subject_info"] = { + "his_id": self.subject, + } + if raw.info.get("device_info", None) is None: + # specify device info as required by BIDS + raw.info["device_info"] = {"type": "eeg"} + raw.set_meas_date(datetime_now) + + # Otherwise, the montage would still have the stim channel + # which is dropped by mne_bids.write_raw_bids: + picks = mne.pick_types(info=raw.info, eeg=True, stim=False) + raw.pick(picks) + + # By using the same anonymization `daysback` number we can + # preserve the longitudinal structure of multiple sessions for a + # single subject and the relation between subjects. Be sure to + # change or delete this number before putting code online, you + # wouldn't want to inadvertently de-anonymize your data. + # + # Note that we do not need to pass any events, as the dataset + # is already equipped with annotations, which will be converted to + # BIDS events automatically. + mne_bids.write_raw_bids( + raw, + bids_path, + format="EDF", + allow_preload=True, + montage=raw.get_montage(), + overwrite=False, + verbose=self.verbose, + ) + + +class BIDSInterfaceEpochs(BIDSInterfaceBase): + """This interface is used to cache mne-epochs to disk. + + Pseudo-BIDS format is used to store the data. + """ + + @property + def _extension(self): + return ".fif" + + @property + def _check(self): + return False + + @property + def _datatype(self): + # because of mne conventions, we need the suffix to be "epo" + # because of mne_bids conventions, we need datatype and suffix to match + return "epo" + + def _load_file(self, bids_path, preload): + epochs = mne.read_epochs(bids_path.fpath, preload=preload, verbose=self.verbose) + return epochs + + def _write_file(self, bids_path, epochs): + epochs.save(bids_path.fpath, overwrite=False, verbose=self.verbose) + + +class BIDSInterfaceNumpyArray(BIDSInterfaceBase): + """This interface is used to cache numpy arrays to disk. + + MOABB Pseudo-BIDS format is used to store the data. + """ + + @property + def _extension(self): + return ".npy" + + @property + def _check(self): + return False + + @property + def _datatype(self): + return "array" + + def _load_file(self, bids_path, preload): + if preload: + raise ValueError("preload must be False for numpy arrays") + events_fname = mne_bids.write._find_matching_sidecar( + bids_path, + suffix="events", + extension=".eve", # mne convention + on_error="raise", + ) + log.debug("Reading %s", bids_path.fpath) + X = np_load(bids_path.fpath) + events = mne.read_events(events_fname, verbose=self.verbose) + return OrderedDict([("X", X), ("events", events)]) + + def _write_file(self, bids_path, obj): + events_path = bids_path.copy().update( + suffix="events", + extension=".eve", + ) + log.debug("Writing %s", bids_path.fpath) + np_save(bids_path.fpath, obj["X"]) + log.debug("Wrote %s", bids_path.fpath) + mne.write_events( + filename=events_path.fpath, + events=obj["events"], + overwrite=False, + verbose=self.verbose, + ) diff --git a/moabb/datasets/fake.py b/moabb/datasets/fake.py index 12f220bcb..95923ca02 100644 --- a/moabb/datasets/fake.py +++ b/moabb/datasets/fake.py @@ -1,5 +1,8 @@ +import tempfile +from pathlib import Path + import numpy as np -from mne import create_info +from mne import create_info, get_config, set_config from mne.channels import make_standard_montage from mne.io import RawArray @@ -44,6 +47,7 @@ def __init__( self.n_runs = n_runs event_id = {ev: ii + 1 for ii, ev in enumerate(event_list)} self.channels = channels + code = f"{code}_{paradigm}_{n_subjects}_{n_sessions}_{n_runs}__{'_'.join(event_list)}__{'_'.join(channels)}" super().__init__( subjects=list(range(1, n_subjects + 1)), sessions_per_subject=n_sessions, @@ -52,6 +56,11 @@ def __init__( interval=[0, 3], paradigm=paradigm, ) + key = "MNE_DATASETS_{:s}-BIDS_PATH".format(self.code.upper()) + temp_dir = get_config(key) + if temp_dir is None or not Path(temp_dir).is_dir(): + temp_dir = tempfile.mkdtemp() + set_config(key, temp_dir) def _get_single_subject_data(self, subject): data = dict() diff --git a/moabb/datasets/preprocessing.py b/moabb/datasets/preprocessing.py new file mode 100644 index 000000000..643964510 --- /dev/null +++ b/moabb/datasets/preprocessing.py @@ -0,0 +1,247 @@ +import logging +from collections import OrderedDict +from operator import methodcaller +from typing import Dict, List, Tuple, Union + +import mne +import numpy as np +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.pipeline import FunctionTransformer, Pipeline + + +log = logging.getLogger(__name__) + + +def _is_none_pipeline(pipeline): + """Check if a pipeline is the result of make_pipeline(None)""" + return ( + isinstance(pipeline, Pipeline) + and pipeline.steps[0][1] is None + and len(pipeline) == 1 + ) + + +class ForkPipelines(TransformerMixin, BaseEstimator): + def __init__(self, transformers: List[Tuple[str, Union[Pipeline, TransformerMixin]]]): + for _, t in transformers: + assert hasattr(t, "transform") + self.transformers = transformers + + def transform(self, X, y=None): + return OrderedDict([(n, t.transform(X)) for n, t in self.transformers]) + + def fit(self, X, y=None): + for _, t in self.transformers: + t.fit(X) + + +class FixedTransformer(TransformerMixin, BaseEstimator): + def fit(self, X, y=None): + pass + + +class SetRawAnnotations(FixedTransformer): + def __init__(self, event_id): + assert isinstance(event_id, dict) # not None + self.event_id = event_id + if len(set(event_id.values())) != len(event_id): + raise ValueError("Duplicate event code") + self.event_desc = dict((code, desc) for desc, code in self.event_id.items()) + + def transform(self, raw, y=None): + if raw.annotations: + return raw + stim_channels = mne.utils._get_stim_channel(None, raw.info, raise_error=False) + if len(stim_channels) == 0: + raise ValueError("Need either a stim channel or annotations") + events = mne.find_events(raw, shortest_event=0, verbose=False) + # we don't catch the error if no event found: + events = mne.pick_events(events, include=list(self.event_id.values())) + annotations = mne.annotations_from_events( + events, + raw.info["sfreq"], + self.event_desc, + first_samp=raw.first_samp, + verbose=False, + ) + raw.set_annotations(annotations) + return raw + + +class RawToEvents(FixedTransformer): + def __init__(self, event_id): + assert isinstance(event_id, dict) # not None + self.event_id = event_id + + def transform(self, raw, y=None): + stim_channels = mne.utils._get_stim_channel(None, raw.info, raise_error=False) + if len(stim_channels) > 0: + events = mne.find_events(raw, shortest_event=0, verbose=False) + else: + events, _ = mne.events_from_annotations( + raw, event_id=self.event_id, verbose=False + ) + try: + events = mne.pick_events(events, include=list(self.event_id.values())) + except RuntimeError: + # skip raw if no event found + return + return events + + +class RawToEventsP300(FixedTransformer): + def __init__(self, event_id): + assert isinstance(event_id, dict) # not None + self.event_id = event_id + + def transform(self, raw, y=None): + event_id = self.event_id + stim_channels = mne.utils._get_stim_channel(None, raw.info, raise_error=False) + if len(stim_channels) > 0: + events = mne.find_events(raw, shortest_event=0, verbose=False) + else: + events, _ = mne.events_from_annotations(raw, event_id=event_id, verbose=False) + try: + if "Target" in event_id and "NonTarget" in event_id: + if ( + type(event_id["Target"]) is list + and type(event_id["NonTarget"]) == list + ): + event_id_new = dict(Target=1, NonTarget=0) + events = mne.merge_events(events, event_id["Target"], 1) + events = mne.merge_events(events, event_id["NonTarget"], 0) + event_id = event_id_new + events = mne.pick_events(events, include=list(event_id.values())) + except RuntimeError: + # skip raw if no event found + return + return events + + +class RawToFixedIntervalEvents(FixedTransformer): + def __init__( + self, + length, + stride, + start_offset, + stop_offset, + marker=1, + ): + self.length = length + self.stride = stride + self.start_offset = start_offset + self.stop_offset = stop_offset + self.marker = marker + + def transform(self, raw: mne.io.BaseRaw, y=None): + if not isinstance(raw, mne.io.BaseRaw): + raise ValueError + sfreq = raw.info["sfreq"] + length_samples = int(self.length * sfreq) + stride_samples = int(self.stride * sfreq) + start_offset_samples = int(self.start_offset * sfreq) + stop_offset_samples = ( + raw.n_times if self.stop_offset is None else int(self.stop_offset * sfreq) + ) + stop_samples = stop_offset_samples - length_samples + raw.first_samp + onset = np.arange( + raw.first_samp + start_offset_samples, + stop_samples, + stride_samples, + ) + if len(onset) == 0: + # skip raw if no event found + return + events = np.empty((len(onset), 3), dtype=int) + events[:, 0] = onset + events[:, 1] = length_samples + events[:, 2] = self.marker + return events + + +class EpochsToEvents(FixedTransformer): + def transform(self, epochs, y=None): + return epochs.events + + +class EventsToLabels(FixedTransformer): + def __init__(self, event_id): + self.event_id = event_id + + def transform(self, events, y=None): + inv_events = {k: v for v, k in self.event_id.items()} + labels = [inv_events[e] for e in events[:, -1]] + return labels + + +class RawToEpochs(FixedTransformer): + def __init__( + self, + event_id: Dict[str, int], + tmin: float, + tmax: float, + baseline: Tuple[float, float], + channels: List[str] = None, + ): + assert isinstance(event_id, dict) # not None + self.event_id = event_id + self.tmin = tmin + self.tmax = tmax + self.baseline = baseline + self.channels = channels + + def transform(self, X, y=None): + raw = X["raw"] + events = X["events"] + if events is None or len(events) == 0: + raise ValueError("No events found") + if not isinstance(raw, mne.io.BaseRaw): + raise ValueError("raw must be a mne.io.BaseRaw") + + if self.channels is None: + picks = mne.pick_types(raw.info, eeg=True, stim=False) + else: + picks = mne.pick_channels( + raw.info["ch_names"], include=self.channels, ordered=True + ) + + epochs = mne.Epochs( + raw, + events, + event_id=self.event_id, + tmin=self.tmin, + tmax=self.tmax, + proj=False, + baseline=self.baseline, + preload=True, + verbose=False, + picks=picks, + event_repeated="drop", + on_missing="ignore", + ) + return epochs + + +def get_filter_pipeline(fmin, fmax): + return FunctionTransformer( + methodcaller( + "filter", + l_freq=fmin, + h_freq=fmax, + method="iir", + picks="eeg", + verbose=False, + ), + ) + + +def get_crop_pipeline(tmin, tmax): + return FunctionTransformer( + methodcaller("crop", tmin=tmax, tmax=tmin, verbose=False), + ) + + +def get_resample_pipeline(sfreq): + return FunctionTransformer( + methodcaller("resample", sfreq=sfreq, verbose=False), + ) diff --git a/moabb/datasets/utils.py b/moabb/datasets/utils.py index 7d3e26902..60224b347 100644 --- a/moabb/datasets/utils.py +++ b/moabb/datasets/utils.py @@ -21,7 +21,7 @@ def _init_dataset_list(): def dataset_search( # noqa: C901 - paradigm, + paradigm=None, multi_session=False, events=None, has_all_events=False, @@ -34,8 +34,8 @@ def dataset_search( # noqa: C901 Parameters ---------- - paradigm: str - 'imagery', 'p300', 'ssvep' + paradigm: str | None + 'imagery', 'p300', 'ssvep', None multi_session: bool if True only returns datasets with more than one session per subject. @@ -66,7 +66,7 @@ def dataset_search( # noqa: C901 n_classes = len(events) else: n_classes = None - assert paradigm in ["imagery", "p300", "ssvep"] + assert paradigm in ["imagery", "p300", "ssvep", None] for type_d in dataset_list: d = type_d() @@ -77,7 +77,7 @@ def dataset_search( # noqa: C901 if len(d.subject_list) < min_subjects: continue - if paradigm != d.paradigm: + if paradigm is not None and paradigm != d.paradigm: continue if interval is not None and d.interval[1] - d.interval[0] < interval: diff --git a/moabb/paradigms/__init__.py b/moabb/paradigms/__init__.py index a1dac30d8..047a59681 100644 --- a/moabb/paradigms/__init__.py +++ b/moabb/paradigms/__init__.py @@ -5,6 +5,7 @@ paradigms; similarly, different preprocessing is necessary for ERP vs ERD paradigms. """ +from moabb.paradigms.fixed_interval_windows import * from moabb.paradigms.motor_imagery import * # flake8: noqa diff --git a/moabb/paradigms/base.py b/moabb/paradigms/base.py index 71b7713b4..3fdc1b4f7 100644 --- a/moabb/paradigms/base.py +++ b/moabb/paradigms/base.py @@ -1,38 +1,92 @@ +import abc import logging -from abc import ABCMeta, abstractmethod +from operator import methodcaller +from typing import List, Optional, Tuple import mne import numpy as np import pandas as pd +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import FunctionTransformer + +from moabb.datasets.preprocessing import ( + EpochsToEvents, + EventsToLabels, + ForkPipelines, + RawToEpochs, + RawToEvents, + get_crop_pipeline, + get_filter_pipeline, + get_resample_pipeline, +) log = logging.getLogger(__name__) -class BaseParadigm(metaclass=ABCMeta): - """Base Paradigm.""" +class BaseProcessing(metaclass=abc.ABCMeta): + """Base Processing. - @abstractmethod - def __init__(self): - pass + Please use one of the child classes - @property - @abstractmethod - def scoring(self): - """Property that defines scoring metric (e.g. ROC-AUC or accuracy - or f-score), given as a sklearn-compatible string or a compatible - sklearn scorer. + Parameters + ---------- - """ - pass + filters: list of list (defaults [[7, 35]]) + bank of bandpass filter to apply. + + tmin: float (default 0.0) + Start time (in second) of the epoch, relative to the dataset specific + task interval e.g. tmin = 1 would mean the epoch will start 1 second + after the beginning of the task as defined by the dataset. + + tmax: float | None, (default None) + End time (in second) of the epoch, relative to the beginning of the + dataset specific task interval. tmax = 5 would mean the epoch will end + 5 second after the beginning of the task as defined in the dataset. If + None, use the dataset value. + + baseline: None | tuple of length 2 + The time interval to consider as “baseline†when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + + channels: list of str | None (default None) + list of channel to select. If None, use all EEG channels available in + the dataset. + + resample: float | None (default None) + If not None, resample the eeg data with the sampling rate provided.""" + + def __init__( + self, + filters: List[Tuple[float, float]], + tmin: float = 0.0, + tmax: Optional[float] = None, + baseline: Optional[Tuple[float, float]] = None, + channels: Optional[List[str]] = None, + resample: Optional[float] = None, + ): + if tmax is not None: + if tmin >= tmax: + raise (ValueError("tmax must be greater than tmin")) + self.filters = filters + self.channels = channels + self.baseline = baseline + self.resample = resample + self.tmin = tmin + self.tmax = tmax @property - @abstractmethod + @abc.abstractmethod def datasets(self): """Property that define the list of compatible datasets""" pass - @abstractmethod + @abc.abstractmethod def is_valid(self, dataset): """Verify the dataset is compatible with the paradigm. @@ -69,156 +123,19 @@ def prepare_process(self, dataset): if dataset is not None: pass - def process_raw( # noqa: C901 - self, raw, dataset, return_epochs=False, return_raws=False - ): - """ - Process one raw data file. - - This function apply the preprocessing and eventual epoching on the - individual run, and return the data, labels and a dataframe with - metadata. - - metadata is a dataframe with as many row as the length of the data - and labels. - - Parameters - ---------- - raw: mne.Raw instance - the raw EEG data. - dataset : dataset instance - The dataset corresponding to the raw file. mainly use to access - dataset specific information. - return_epochs: boolean - This flag specifies whether to return only the data array or the - complete processed mne.Epochs - return_raws: boolean - To return raw files and events, to ensure compatibility with braindecode. - Mutually exclusive with return_epochs - - returns - ------- - X : Union[np.ndarray, mne.Epochs] - the data that will be used as features for the model - Note: if return_epochs=True, this is mne.Epochs - if return_epochs=False, this is np.ndarray - labels: np.ndarray - the labels for training / evaluating the model - metadata: pd.DataFrame - A dataframe containing the metadata - - """ - - if return_epochs and return_raws: - message = "Select only return_epochs or return_raws, not both" - raise ValueError(message) - - # get events id - event_id = self.used_events(dataset) - - # find the events, first check stim_channels then annotations - stim_channels = mne.utils._get_stim_channel(None, raw.info, raise_error=False) - if len(stim_channels) > 0: - events = mne.find_events(raw, shortest_event=0, verbose=False) - else: - try: - events, _ = mne.events_from_annotations( - raw, event_id=event_id, verbose=False - ) - except ValueError: - log.warning(f"No matching annotations in {raw.filenames}") - return - - # picks channels - if self.channels is None: - picks = mne.pick_types(raw.info, eeg=True, stim=False) - else: - picks = mne.pick_channels( - raw.info["ch_names"], include=self.channels, ordered=True - ) - - # pick events, based on event_id - try: - events = mne.pick_events(events, include=list(event_id.values())) - except RuntimeError: - # skip raw if no event found - return - - if return_raws: - raw = raw.pick(picks) - else: - # get interval - tmin = self.tmin + dataset.interval[0] - if self.tmax is None: - tmax = dataset.interval[1] - else: - tmax = self.tmax + dataset.interval[0] - - X = [] - for bandpass in self.filters: - fmin, fmax = bandpass - # filter data - raw_f = raw.copy().filter( - fmin, fmax, method="iir", picks=picks, verbose=False - ) - # epoch data - baseline = self.baseline - if baseline is not None: - baseline = ( - self.baseline[0] + dataset.interval[0], - self.baseline[1] + dataset.interval[0], - ) - bmin = baseline[0] if baseline[0] < tmin else tmin - bmax = baseline[1] if baseline[1] > tmax else tmax - else: - bmin = tmin - bmax = tmax - epochs = mne.Epochs( - raw_f, - events, - event_id=event_id, - tmin=bmin, - tmax=bmax, - proj=False, - baseline=baseline, - preload=True, - verbose=False, - picks=picks, - event_repeated="drop", - on_missing="ignore", - ) - if bmin < tmin or bmax > tmax: - epochs.crop(tmin=tmin, tmax=tmax) - if self.resample is not None: - epochs = epochs.resample(self.resample) - # rescale to work with uV - if return_epochs: - X.append(epochs) - else: - X.append(dataset.unit_factor * epochs.get_data()) - - # overwrite events in case epochs have been dropped: - # (assuming all filters produce the same number of epochs...) - events = epochs.events - - inv_events = {k: v for v, k in event_id.items()} - labels = np.array([inv_events[e] for e in events[:, -1]]) - - if return_epochs: - X = mne.concatenate_epochs(X) - elif return_raws: - X = raw - elif len(self.filters) == 1: - # if only one band, return a 3D array - X = X[0] - else: - # otherwise return a 4D - X = np.array(X).transpose((1, 2, 3, 0)) - - metadata = pd.DataFrame(index=range(len(labels))) - return X, labels, metadata + @abc.abstractmethod + def used_events(self, dataset): + pass - def get_data(self, dataset, subjects=None, return_epochs=False, return_raws=False): + def get_data( # noqa: C901 + self, + dataset, + subjects=None, + return_epochs=False, + return_raws=False, + processing_pipeline=None, + cache_config=None, + ): """ Return the data for a list of subject. @@ -241,6 +158,8 @@ def get_data(self, dataset, subjects=None, return_epochs=False, return_raws=Fals return_raws: boolean To return raw files and events, to ensure compatibility with braindecode. Mutually exclusive with return_epochs + cache_config: dict | CacheConfig + Configuration for caching of datasets. See :class:`moabb.datasets.base.CacheConfig` for details. returns ------- @@ -262,29 +181,100 @@ def get_data(self, dataset, subjects=None, return_epochs=False, return_raws=Fals message = "Select only return_epochs or return_raws, not both" raise ValueError(message) - data = dataset.get_data(subjects) + if subjects is None: + subjects = dataset.subject_list + self.prepare_process(dataset) + raw_pipelines = self._get_raw_pipelines() + epochs_pipeline = self._get_epochs_pipeline(return_epochs, return_raws, dataset) + array_pipeline = self._get_array_pipeline( + return_epochs, return_raws, dataset, processing_pipeline + ) + if return_epochs: + labels_pipeline = make_pipeline( + EpochsToEvents(), + EventsToLabels(event_id=self.used_events(dataset)), + ) + elif return_raws: + labels_pipeline = make_pipeline( + self._get_events_pipeline(dataset), + EventsToLabels(event_id=self.used_events(dataset)), + ) + else: # return array + labels_pipeline = EventsToLabels(event_id=self.used_events(dataset)) - X = [] if (return_epochs or return_raws) else np.array([]) + if array_pipeline is not None: + events_pipeline = ( + self._get_events_pipeline(dataset) if return_raws else EpochsToEvents() + ) + else: + events_pipeline = None + + data = [ + dataset.get_data( + subjects=subjects, + cache_config=cache_config, + raw_pipeline=raw_pipeline, + epochs_pipeline=epochs_pipeline, + array_pipeline=array_pipeline, + events_pipeline=events_pipeline, + ) + for raw_pipeline in raw_pipelines + ] + + X = [] labels = [] metadata = [] - for subject, sessions in data.items(): + for subject, sessions in data[0].items(): for session, runs in sessions.items(): - for run, raw in runs.items(): - proc = self.process_raw(raw, dataset, return_epochs, return_raws) - - if proc is None: + for run in runs.keys(): + proc = [data_i[subject][session][run] for data_i in data] + if any(obj is None for obj in proc): # this mean the run did not contain any selected event # go to next + assert all(obj is None for obj in proc) # sanity check continue - x, lbs, met = proc + if return_epochs: + assert all(len(proc[0]) == len(p) for p in proc[1:]) + n = len(proc[0]) + lbs = labels_pipeline.transform(proc[0]) + x = ( + proc[0] + if len(self.filters) == 1 + else mne.concatenate_epochs(proc) + ) + elif return_raws: + assert all(len(proc[0]) == len(p) for p in proc[1:]) + n = 1 + lbs = labels_pipeline.transform( + proc[0] + ) # XXX does it make sense to return labels for raws? + x = proc[0] if len(self.filters) == 1 else proc + else: # return array + assert all( + np.array_equal(proc[0]["X"].shape, p["X"].shape) + for p in proc[1:] + ) + assert all( + np.array_equal(proc[0]["events"], p["events"]) + for p in proc[1:] + ) + n = proc[0]["X"].shape[0] + events = proc[0]["events"] + lbs = labels_pipeline.transform(events) + x = ( + proc[0]["X"] + if len(self.filters) == 1 + else np.array([p["X"] for p in proc]).transpose((1, 2, 3, 0)) + ) + + met = pd.DataFrame(index=range(n)) met["subject"] = subject met["session"] = session met["run"] = run metadata.append(met) - # grow X and labels in a memory efficient way. can be slow if return_epochs: x.metadata = ( met.copy() @@ -293,14 +283,136 @@ def get_data(self, dataset, subjects=None, return_epochs=False, return_raws=Fals [met.copy()] * len(self.filters), ignore_index=True ) ) - X.append(x) - elif return_raws: - X.append(x) - else: - X = np.append(X, x, axis=0) if len(X) else x - labels = np.append(labels, lbs, axis=0) + X.append(x) + labels.append(lbs) metadata = pd.concat(metadata, ignore_index=True) + labels = np.concatenate(labels) if return_epochs: X = mne.concatenate_epochs(X) + elif return_raws: + pass + else: + X = np.concatenate(X, axis=0) return X, labels, metadata + + def _get_raw_pipelines(self): + return [get_filter_pipeline(fmin, fmax) for fmin, fmax in self.filters] + + def _get_epochs_pipeline(self, return_epochs, return_raws, dataset): + if return_raws: + return None + + tmin = self.tmin + dataset.interval[0] + if self.tmax is None: + tmax = dataset.interval[1] + else: + tmax = self.tmax + dataset.interval[0] + + baseline = self.baseline + if baseline is not None: + baseline = ( + self.baseline[0] + dataset.interval[0], + self.baseline[1] + dataset.interval[0], + ) + bmin = baseline[0] if baseline[0] < tmin else tmin + bmax = baseline[1] if baseline[1] > tmax else tmax + else: + bmin = tmin + bmax = tmax + steps = [] + steps.append( + ( + "epoching", + make_pipeline( + ForkPipelines( + [ + ("raw", make_pipeline(None)), + ("events", self._get_events_pipeline(dataset)), + ] + ), + RawToEpochs( + event_id=self.used_events(dataset), + tmin=bmin, + tmax=bmax, + baseline=baseline, + channels=self.channels, + ), + ), + ) + ) + if bmin < tmin or bmax > tmax: + steps.append(("crop", get_crop_pipeline(tmin=tmin, tmax=tmax))) + if self.resample is not None: + steps.append(("resample", get_resample_pipeline(self.resample))) + if return_epochs: # needed to concatenate epochs + steps.append(("load_data", FunctionTransformer(methodcaller("load_data")))) + return Pipeline(steps) + + def _get_array_pipeline( + self, return_epochs, return_raws, dataset, processing_pipeline + ): + steps = [] + if not return_epochs and not return_raws: + steps.append(("get_data", FunctionTransformer(methodcaller("get_data")))) + steps.append( + ( + "scaling", + FunctionTransformer(methodcaller("__mul__", dataset.unit_factor)), + ) + ) + if processing_pipeline is not None: + steps.append(("processing_pipeline", processing_pipeline)) + if len(steps) == 0: + return None + return Pipeline(steps) + + @abc.abstractmethod + def _get_events_pipeline(self, dataset): + pass + + +class BaseParadigm(BaseProcessing): + """Base class for paradigms. + + Parameters + ---------- + + events: List of str | None (default None) + event to use for epoching. If None, default to all events defined in + the dataset. + """ + + def __init__( + self, + filters, + events: Optional[List[str]] = None, + tmin=0.0, + tmax=None, + baseline=None, + channels=None, + resample=None, + ): + super().__init__( + filters=filters, + channels=channels, + baseline=baseline, + resample=resample, + tmin=tmin, + tmax=tmax, + ) + self.events = events + + @property + @abc.abstractmethod + def scoring(self): + """Property that defines scoring metric (e.g. ROC-AUC or accuracy + or f-score), given as a sklearn-compatible string or a compatible + sklearn scorer. + + """ + pass + + def _get_events_pipeline(self, dataset): + event_id = self.used_events(dataset) + return RawToEvents(event_id=event_id) diff --git a/moabb/paradigms/fixed_interval_windows.py b/moabb/paradigms/fixed_interval_windows.py new file mode 100644 index 000000000..f3a0c4eb4 --- /dev/null +++ b/moabb/paradigms/fixed_interval_windows.py @@ -0,0 +1,264 @@ +from moabb.datasets import utils +from moabb.datasets.preprocessing import RawToFixedIntervalEvents +from moabb.paradigms.base import BaseProcessing + + +class BaseFixedIntervalWindowsProcessing(BaseProcessing): + """Base class for fixed interval windows processing. + + Paradigm for creating epochs at fixed interval, + ignoring the stim channel and events of the dataset. + + Parameters + ---------- + + filters: list of list (default [[7, 45]]) + bank of bandpass filter to apply. + + baseline: None | tuple of length 2 + The time interval to consider as “baseline†when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + + channels: list of str | None (default None) + list of channel to select. If None, use all EEG channels available in + the dataset. + + resample: float | None (default None) + If not None, resample the eeg data with the sampling rate provided. + + length: float (default 5.0) + Length of the epochs in seconds. + + stride: float (default 10.0) + Stride between epochs in seconds. + + start_offset: float (default 0.0) + Start from the beginning of the raw recordings in seconds. + + stop_offset: float | None (default None) + Stop offset from beginning of raw recordings in seconds. + If None, set to be the end of the recording. + + marker: int (default -1) + Marker to use for the events created. + """ + + def __init__( + self, + filters=None, + baseline=None, + channels=None, + resample=None, + length: float = 5.0, + stride: float = 10.0, + start_offset=0.0, + stop_offset=None, + marker=-1, + ): + if not filters: + raise ValueError("filters must be specified") + tmin = 0.0 + tmax = length + super().__init__( + filters=filters, + channels=channels, + baseline=baseline, + resample=resample, + tmin=tmin, + tmax=tmax, + ) + self.length = length + self.stride = stride + self.start_offset = start_offset + self.stop_offset = stop_offset + self.marker = marker + + def _to_samples(self, key): + value = getattr(self, key) + if self.resample is None: + raise ValueError(f"{key}_samples: must be specified") + if value is None: + raise ValueError(f"{key}_samples: {key} must be specified") + return int(value * self.resample) + + @property + def length_samples(self): + return self._to_samples("length") + + @property + def stride_samples(self): + return self._to_samples("stride") + + @property + def start_offset_samples(self): + return self._to_samples("start_offset") + + @property + def stop_offset_samples(self): + return self._to_samples("stop_offset") + + def used_events(self, dataset): + return {"Window": self.marker} + + def is_valid(self, dataset): + return True + + @property + def datasets(self): + return utils.dataset_search(paradigm=None) + + def _get_events_pipeline(self, dataset): + return RawToFixedIntervalEvents( + length=self.length, + stride=self.stride, + start_offset=self.start_offset, + stop_offset=self.stop_offset, + marker=self.marker, + ) + + +class FixedIntervalWindowsProcessing(BaseFixedIntervalWindowsProcessing): + """Fixed interval windows processing. + + Paradigm for creating epochs at fixed interval, + ignoring the stim channel and events of the dataset. + + Parameters + ---------- + + fmin: float (default 7) + cutoff frequency (Hz) for the high pass filter + + fmax: float (default 45) + cutoff frequency (Hz) for the low pass filter + + baseline: None | tuple of length 2 + The time interval to consider as “baseline†when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + + channels: list of str | None (default None) + list of channel to select. If None, use all EEG channels available in + the dataset. + + resample: float | None (default None) + If not None, resample the eeg data with the sampling rate provided. + + length: float (default 5.0) + Length of the epochs in seconds. + + stride: float (default 10.0) + Stride between epochs in seconds. + + start_offset: float (default 0.0) + Start from the beginning of the raw recordings in seconds. + + stop_offset: float | None (default None) + Stop offset from beginning of raw recordings in seconds. + If None, set to be the end of the recording. + + marker: int (default -1) + Marker to use for the events created. + """ + + def __init__( + self, + fmin=7, + fmax=45, + baseline=None, + channels=None, + resample=None, + length: float = 5.0, + stride: float = 10.0, + start_offset=0.0, + stop_offset=None, + marker=-1, + ): + super().__init__( + filters=[(fmin, fmax)], + baseline=baseline, + channels=channels, + resample=resample, + length=length, + stride=stride, + start_offset=start_offset, + stop_offset=stop_offset, + marker=marker, + ) + + +class FilterBankFixedIntervalWindowsProcessing(BaseFixedIntervalWindowsProcessing): + """Filter bank fixed interval windows processing. + + Paradigm for creating epochs at fixed interval + with multiple narrow bandpass filters, + ignoring the stim channel and events of the dataset. + + Parameters + ---------- + + filters: list of list (default ((8, 12), (12, 16), (16, 20), (20, 24), (24, 28), (28, 32))) + bank of bandpass filter to apply. + + baseline: None | tuple of length 2 + The time interval to consider as “baseline†when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + + channels: list of str | None (default None) + list of channel to select. If None, use all EEG channels available in + the dataset. + + resample: float | None (default None) + If not None, resample the eeg data with the sampling rate provided. + + length: float (default 5.0) + Length of the epochs in seconds. + + stride: float (default 10.0) + Stride between epochs in seconds. + + start_offset: float (default 0.0) + Start from the beginning of the raw recordings in seconds. + + stop_offset: float | None (default None) + Stop offset from beginning of raw recordings in seconds. + If None, set to be the end of the recording. + + marker: int (default -1) + Marker to use for the events created. + """ + + def __init__( + self, + filters=((8, 12), (12, 16), (16, 20), (20, 24), (24, 28), (28, 32)), + baseline=None, + channels=None, + resample=None, + length: float = 5.0, + stride: float = 10.0, + start_offset=0.0, + stop_offset=None, + marker=-1, + ): + super().__init__( + filters=filters, + baseline=baseline, + channels=channels, + resample=resample, + length=length, + stride=stride, + start_offset=start_offset, + stop_offset=stop_offset, + marker=marker, + ) diff --git a/moabb/paradigms/motor_imagery.py b/moabb/paradigms/motor_imagery.py index f34c5697a..05c7d0273 100644 --- a/moabb/paradigms/motor_imagery.py +++ b/moabb/paradigms/motor_imagery.py @@ -63,19 +63,15 @@ def __init__( channels=None, resample=None, ): - super().__init__() - self.filters = filters - self.events = events - self.channels = channels - self.baseline = baseline - self.resample = resample - - if tmax is not None: - if tmin >= tmax: - raise (ValueError("tmax must be greater than tmin")) - - self.tmin = tmin - self.tmax = tmax + super().__init__( + filters=filters, + events=events, + channels=channels, + baseline=baseline, + resample=resample, + tmin=tmin, + tmax=tmax, + ) def is_valid(self, dataset): ret = True @@ -421,4 +417,4 @@ def datasets(self): return [FakeDataset(["left_hand", "right_hand"], paradigm="imagery")] def is_valid(self, dataset): - return True + return dataset.paradigm == "imagery" diff --git a/moabb/paradigms/p300.py b/moabb/paradigms/p300.py index 09650ad61..88f692fd3 100644 --- a/moabb/paradigms/p300.py +++ b/moabb/paradigms/p300.py @@ -1,14 +1,10 @@ """P300 Paradigms""" -import abc import logging -import mne -import numpy as np -import pandas as pd - from moabb.datasets import utils from moabb.datasets.fake import FakeDataset +from moabb.datasets.preprocessing import RawToEventsP300 from moabb.paradigms.base import BaseParadigm @@ -67,19 +63,15 @@ def __init__( channels=None, resample=None, ): - super().__init__() - self.filters = filters - self.events = events - self.channels = channels - self.baseline = baseline - self.resample = resample - - if tmax is not None: - if tmin >= tmax: - raise (ValueError("tmax must be greater than tmin")) - - self.tmin = tmin - self.tmax = tmax + super().__init__( + filters=filters, + events=events, + channels=channels, + baseline=baseline, + resample=resample, + tmin=tmin, + tmax=tmax, + ) def is_valid(self, dataset): ret = True @@ -94,167 +86,9 @@ def is_valid(self, dataset): # we should verify list of channels, somehow return ret - @abc.abstractmethod - def used_events(self, dataset): - pass - - def process_raw( # noqa: C901 - self, raw, dataset, return_epochs=False, return_raws=False - ): - """ - Process one raw data file. - - This function apply the preprocessing and eventual epoching on the - individual run, and return the data, labels and a dataframe with - metadata. - - metadata is a dataframe with as many row as the length of the data - and labels. - - Parameters - ---------- - raw: mne.Raw instance - the raw EEG data. - dataset : dataset instance - The dataset corresponding to the raw file. mainly use to access - dataset specific information. - return_epochs: boolean - This flag specifies whether to return only the data array or the - complete processed mne.Epochs - return_raws: boolean - To return raw files and events, to ensure compatibility with braindecode. - Mutually exclusive with return_epochs - - returns - ------- - X : Union[np.ndarray, mne.Epochs] - the data that will be used as features for the model - Note: if return_epochs=True, this is mne.Epochs - if return_epochs=False, this is np.ndarray - labels: np.ndarray - the labels for training / evaluating the model - metadata: pd.DataFrame - A dataframe containing the metadata - - """ - - if return_epochs and return_raws: - message = "Select only return_epochs or return_raws, not both" - raise ValueError(message) - - # get events id + def _get_events_pipeline(self, dataset): event_id = self.used_events(dataset) - - # find the events, first check stim_channels then annotations - stim_channels = mne.utils._get_stim_channel(None, raw.info, raise_error=False) - if len(stim_channels) > 0: - events = mne.find_events(raw, shortest_event=0, verbose=False) - else: - try: - events, _ = mne.events_from_annotations( - raw, event_id=event_id, verbose=False - ) - except ValueError: - log.warning(f"No matching annotations in {raw.filenames}") - return - - # picks channels - if self.channels is None: - picks = mne.pick_types(raw.info, eeg=True, stim=False) - else: - picks = mne.pick_channels( - raw.info["ch_names"], include=self.channels, ordered=True - ) - - # pick events, based on event_id - try: - if "Target" in event_id and "NonTarget" in event_id: - if ( - type(event_id["Target"]) is list - and type(event_id["NonTarget"]) == list - ): - event_id_new = dict(Target=1, NonTarget=0) - events = mne.merge_events(events, event_id["Target"], 1) - events = mne.merge_events(events, event_id["NonTarget"], 0) - event_id = event_id_new - events = mne.pick_events(events, include=list(event_id.values())) - except RuntimeError: - # skip raw if no event found - return - - if return_raws: - raw = raw.pick(picks) - else: - # get interval - tmin = self.tmin + dataset.interval[0] - if self.tmax is None: - tmax = dataset.interval[1] - else: - tmax = self.tmax + dataset.interval[0] - - X = [] - for bandpass in self.filters: - fmin, fmax = bandpass - # filter data - raw_f = raw.copy().filter( - fmin, fmax, method="iir", picks=picks, verbose=False - ) - # epoch data - baseline = self.baseline - if baseline is not None: - baseline = ( - self.baseline[0] + dataset.interval[0], - self.baseline[1] + dataset.interval[0], - ) - bmin = baseline[0] if baseline[0] < tmin else tmin - bmax = baseline[1] if baseline[1] > tmax else tmax - else: - bmin = tmin - bmax = tmax - epochs = mne.Epochs( - raw_f, - events, - event_id=event_id, - tmin=bmin, - tmax=bmax, - proj=False, - baseline=baseline, - preload=True, - verbose=False, - picks=picks, - event_repeated="drop", - on_missing="ignore", - ) - if bmin < tmin or bmax > tmax: - epochs.crop(tmin=tmin, tmax=tmax) - if self.resample is not None: - epochs = epochs.resample(self.resample) - # rescale to work with uV - if return_epochs: - X.append(epochs) - else: - X.append(dataset.unit_factor * epochs.get_data()) - - # overwrite events in case epochs have been dropped: - # (assuming all filters produce the same number of epochs...) - events = epochs.events - - inv_events = {k: v for v, k in event_id.items()} - labels = np.array([inv_events[e] for e in events[:, -1]]) - - if return_epochs: - X = mne.concatenate_epochs(X) - elif return_raws: - X = raw - elif len(self.filters) == 1: - # if only one band, return a 3D array - X = X[0] - else: - # otherwise return a 4D - X = np.array(X).transpose((1, 2, 3, 0)) - - metadata = pd.DataFrame(index=range(len(labels))) - return X, labels, metadata + return RawToEventsP300(event_id=event_id) @property def datasets(self): @@ -358,4 +192,4 @@ def datasets(self): return [FakeDataset(["Target", "NonTarget"], paradigm="p300")] def is_valid(self, dataset): - return True + return dataset.paradigm == "p300" diff --git a/moabb/paradigms/ssvep.py b/moabb/paradigms/ssvep.py index 0c345985e..fb5a5d378 100644 --- a/moabb/paradigms/ssvep.py +++ b/moabb/paradigms/ssvep.py @@ -63,19 +63,17 @@ def __init__( channels=None, resample=None, ): - super().__init__() - self.filters = filters - self.events = events - self.n_classes = n_classes - self.baseline = baseline - self.channels = channels - self.resample = resample - - if tmax is not None and tmin >= tmax: - raise (ValueError("tmax must be greater than tmin")) - self.tmin = tmin - self.tmax = tmax + super().__init__( + filters=filters, + events=events, + channels=channels, + baseline=baseline, + resample=resample, + tmin=tmin, + tmax=tmax, + ) + self.n_classes = n_classes if self.events is None: log.warning( "Choosing the first " @@ -266,4 +264,4 @@ def datasets(self): return [FakeDataset(event_list=["13", "15"], paradigm="ssvep")] def is_valid(self, dataset): - return True + return dataset.paradigm == "ssvep" diff --git a/moabb/tests/benchmark.py b/moabb/tests/benchmark.py index 1f2ff32fa..a5ef28b60 100644 --- a/moabb/tests/benchmark.py +++ b/moabb/tests/benchmark.py @@ -16,17 +16,27 @@ def tearDownClass(cls): rep_dir = Path.cwd() / Path("benchmark/") shutil.rmtree(rep_dir) - def test_benchmark(self): + def test_benchmark_strdataset(self): res = benchmark( pipelines=str(self.pp_dir), evaluations=["WithinSession"], - include_datasets=["FakeDataset"], + include_datasets=[ + "FakeDataset_imagery_10_2_2__left_hand_right_hand__C3_Cz_C4", + "FakeDataset_p300_10_2_2__Target_NonTarget__C3_Cz_C4", + "FakeDataset_ssvep_10_2_2__13_15__C3_Cz_C4", + ], ) self.assertEqual(len(res), 80) + + def test_benchmark_objdataset(self): res = benchmark( pipelines=str(self.pp_dir), evaluations=["WithinSession"], - include_datasets=[FakeDataset()], + include_datasets=[ + FakeDataset(["left_hand", "right_hand"], paradigm="imagery"), + FakeDataset(["Target", "NonTarget"], paradigm="p300"), + FakeDataset(["13", "15"], paradigm="ssvep"), + ], ) self.assertEqual(len(res), 80) diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index 4143619ee..016a7cc04 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -1,3 +1,5 @@ +import shutil +import tempfile import unittest import mne @@ -57,11 +59,86 @@ def test_fake_dataset(self): self.assertEqual(len(data[1]["session_0"]), n_runs) # We should get a raw array at the end - self.assertEqual(type(data[1]["session_0"]["run_0"]), mne.io.RawArray) + self.assertIsInstance(data[1]["session_0"]["run_0"], mne.io.BaseRaw) # bad subject id must raise error self.assertRaises(ValueError, ds.get_data, [1000]) + def test_cache_dataset(self): + tempdir = tempfile.mkdtemp() + for paradigm in ["imagery", "p300", "ssvep"]: + dataset = FakeDataset(paradigm=paradigm) + # Save cache: + with self.assertLogs( + logger="moabb.datasets.bids_interface", level="INFO" + ) as cm: + _ = dataset.get_data( + subjects=[1], + cache_config=dict( + save_raw=True, + use=True, + overwrite_raw=False, + path=tempdir, + ), + ) + print("\n".join(cm.output)) + expected = [ + "Attempting to retrieve cache .* datatype-eeg", # empty pipeline + "No cache found at", + "Starting caching .* datatype-eeg", + "Finished caching .* datatype-eeg", + ] + self.assertEqual(len(expected), len(cm.output)) + for i, regex in enumerate(expected): + self.assertRegex(cm.output[i], regex) + + # Load cache: + with self.assertLogs( + logger="moabb.datasets.bids_interface", level="INFO" + ) as cm: + _ = dataset.get_data( + subjects=[1], + cache_config=dict( + save_raw=True, + use=True, + overwrite_raw=False, + path=tempdir, + ), + ) + print("\n".join(cm.output)) + expected = [ + "Attempting to retrieve cache .* datatype-eeg", + "Finished reading cache .* datatype-eeg", + ] + self.assertEqual(len(expected), len(cm.output)) + for i, regex in enumerate(expected): + self.assertRegex(cm.output[i], regex) + + # Overwrite cache: + with self.assertLogs( + logger="moabb.datasets.bids_interface", level="INFO" + ) as cm: + _ = dataset.get_data( + subjects=[1], + cache_config=dict( + save_raw=True, + use=True, + overwrite_raw=True, + path=tempdir, + ), + ) + print("\n".join(cm.output)) + expected = [ + "Starting erasing cache .* datatype-eeg", + "Finished erasing cache .* datatype-eeg", + "Starting caching .* datatype-eeg", + "Finished caching .* datatype-eeg", + ] + self.assertEqual(len(expected), len(cm.output)) + for i, regex in enumerate(expected): + self.assertRegex(cm.output[i], regex) + shutil.rmtree(tempdir) + def test_dataset_accept(self): """verify that accept licence is working""" # Only Shin2017 (bbci_eeg_fnirs) for now @@ -132,7 +209,7 @@ def test_fake_dataset(self): # Check data type self.assertTrue(isinstance(data, dict)) - self.assertEqual(type(data[1]["session_0"]["run_0"]), mne.io.RawArray) + self.assertIsInstance(data[1]["session_0"]["run_0"], mne.io.BaseRaw) # Check data size self.assertEqual(len(data), 1) diff --git a/moabb/tests/paradigms.py b/moabb/tests/paradigms.py index 1c128e231..0d31b8f54 100644 --- a/moabb/tests/paradigms.py +++ b/moabb/tests/paradigms.py @@ -1,5 +1,8 @@ import logging +import shutil +import tempfile import unittest +from math import ceil import numpy as np from mne import BaseEpochs @@ -12,9 +15,11 @@ BaseMotorImagery, BaseP300, BaseSSVEP, + FilterBankFixedIntervalWindowsProcessing, FilterBankLeftRightImagery, FilterBankMotorImagery, FilterBankSSVEP, + FixedIntervalWindowsProcessing, LeftRightImagery, RestingStateToP300Adapter, ) @@ -97,13 +102,23 @@ def test_baseImagery_wrongevent(self): # selected event. certain runs in dataset are event specific. paradigm = SimpleMotorImagery(filters=[[7, 12], [12, 24]]) dataset = FakeDataset(paradigm="imagery") - raw = dataset.get_data([1])[1]["session_0"]["run_0"] + epochs_pipeline = paradigm._get_epochs_pipeline( + return_epochs=True, return_raws=False, dataset=dataset + ) + # no stim channel after loading cache + raw = dataset.get_data([1], cache_config=dict(use=False, save_raw=False))[1][ + "session_0" + ]["run_0"] + raw.load_data() + self.assertEqual("stim", raw.ch_names[-1]) # add something on the event channel raw._data[-1] *= 10 - self.assertIsNone(paradigm.process_raw(raw, dataset)) + with self.assertRaises(ValueError, msg="No events found"): + epochs_pipeline.transform(raw) # zeros it out raw._data[-1] *= 0 - self.assertIsNone(paradigm.process_raw(raw, dataset)) + with self.assertRaises(ValueError, msg="No events found"): + epochs_pipeline.transform(raw) def test_BaseImagery_noevent(self): # Assert error if events from paradigm and dataset dont overlap @@ -138,6 +153,132 @@ def test_BaseImagery_epochsmetadata(self): # does not work with multiple filters: self.assertTrue(metadata.equals(epochs.metadata)) + def test_BaseImagery_cache(self): + tempdir = tempfile.mkdtemp() + dataset = FakeDataset(paradigm="imagery", n_sessions=1, n_runs=1) + paradigm = SimpleMotorImagery() + # We save the full cache (raws, epochs, arrays): + from moabb import set_log_level + + set_log_level("INFO") + with self.assertLogs(logger="moabb.datasets.bids_interface", level="INFO") as cm: + _ = paradigm.get_data( + dataset, + subjects=[1], + cache_config=dict( + use=True, + path=tempdir, + save_raw=True, + save_epochs=True, + save_array=True, + overwrite_raw=False, + overwrite_epochs=False, + overwrite_array=False, + ), + ) + print("\n".join(cm.output)) + expected = [ + "Attempting to retrieve cache .* datatype-array", + "No cache found at", + "Attempting to retrieve cache .* datatype-epo", + "No cache found at", + "Attempting to retrieve cache .* datatype-eeg", # raw_pipeline + "No cache found at", + "Attempting to retrieve cache .* datatype-eeg", # SetRawAnnotations pipeline + "No cache found at", + "Starting caching .* datatype-eeg", + "Finished caching .* datatype-eeg", + "Starting caching .* datatype-epo", + "Finished caching .* datatype-epo", + "Starting caching .* datatype-array", + "Finished caching .* datatype-array", + ] + self.assertEqual(len(expected), len(cm.output)) + for i, regex in enumerate(expected): + self.assertRegex(cm.output[i], regex) + + # Test loading the array cache: + with self.assertLogs(logger="moabb.datasets.bids_interface", level="INFO") as cm: + _ = paradigm.get_data( + dataset, + subjects=[1], + cache_config=dict( + use=True, + path=tempdir, + save_raw=False, + save_epochs=False, + save_array=False, + overwrite_raw=False, + overwrite_epochs=False, + overwrite_array=False, + ), + ) + print("\n".join(cm.output)) + expected = [ + "Attempting to retrieve cache .* datatype-array", + "Finished reading cache .* datatype-array", + ] + self.assertEqual(len(expected), len(cm.output)) + for i, regex in enumerate(expected): + self.assertRegex(cm.output[i], regex) + + # Test loading the epochs cache: + with self.assertLogs(logger="moabb.datasets.bids_interface", level="INFO") as cm: + _ = paradigm.get_data( + dataset, + subjects=[1], + cache_config=dict( + use=True, + path=tempdir, + save_raw=False, + save_epochs=False, + save_array=False, + overwrite_raw=False, + overwrite_epochs=False, + overwrite_array=True, + ), + ) + print("\n".join(cm.output)) + expected = [ + "Starting erasing cache .* datatype-array", + "Finished erasing cache .* datatype-array", + "Attempting to retrieve cache .* datatype-epo", + "Finished reading cache .* datatype-epo", + ] + self.assertEqual(len(expected), len(cm.output)) + for i, regex in enumerate(expected): + self.assertRegex(cm.output[i], regex) + + # Test loading the raw cache: + with self.assertLogs(logger="moabb.datasets.bids_interface", level="INFO") as cm: + _ = paradigm.get_data( + dataset, + subjects=[1], + cache_config=dict( + use=True, + path=tempdir, + save_raw=False, + save_epochs=False, + save_array=False, + overwrite_raw=False, + overwrite_epochs=True, + overwrite_array=False, + ), + ) + print("\n".join(cm.output)) + expected = [ + "Attempting to retrieve cache .* datatype-array", + "No cache found at", + "Starting erasing cache .* datatype-epo", + "Finished erasing cache .* datatype-epo", + "Attempting to retrieve cache .* datatype-eeg", + "Finished reading cache .* datatype-eeg", + ] + self.assertEqual(len(expected), len(cm.output)) + for i, regex in enumerate(expected): + self.assertRegex(cm.output[i], regex) + shutil.rmtree(tempdir) + def test_LeftRightImagery_paradigm(self): # with a good dataset paradigm = LeftRightImagery() @@ -273,13 +414,23 @@ def test_BaseP300_wrongevent(self): # selected event. certain runs in dataset are event specific. paradigm = SimpleP300(filters=[[1, 12], [12, 24]]) dataset = FakeDataset(paradigm="p300", event_list=["Target", "NonTarget"]) - raw = dataset.get_data([1])[1]["session_0"]["run_0"] + epochs_pipeline = paradigm._get_epochs_pipeline( + return_epochs=True, return_raws=False, dataset=dataset + ) + # no stim channel after loading cache + raw = dataset.get_data([1], cache_config=dict(use=False, save_raw=False))[1][ + "session_0" + ]["run_0"] + raw.load_data() + self.assertEqual("stim", raw.ch_names[-1]) # add something on the event channel raw._data[-1] *= 10 - self.assertIsNone(paradigm.process_raw(raw, dataset)) + with self.assertRaises(ValueError, msg="No events found"): + epochs_pipeline.transform(raw) # zeros it out raw._data[-1] *= 0 - self.assertIsNone(paradigm.process_raw(raw, dataset)) + with self.assertRaises(ValueError, msg="No events found"): + epochs_pipeline.transform(raw) def test_BaseP300_droppedevent(self): dataset = FakeDataset(paradigm="p300", event_list=["Target", "NonTarget"]) @@ -352,10 +503,18 @@ def test_RestingState_paradigm(self): # we should have two sessions in the metadata self.assertEqual(len(np.unique(metadata.session)), 2) # should return epochs - epochs, _, _ = paradigm.get_data(dataset, subjects=[1], return_epochs=True) + epochs, _, _ = paradigm.get_data( + dataset, + subjects=[1], + return_epochs=True, + ) self.assertIsInstance(epochs, BaseEpochs) # should return raws - raws, _, _ = paradigm.get_data(dataset, subjects=[1], return_raws=True) + raws, _, _ = paradigm.get_data( + dataset, + subjects=[1], + return_raws=True, + ) for raw in raws: self.assertIsInstance(raw, BaseRaw) # should raise error @@ -572,3 +731,55 @@ def test_FilterBankSSVEP_filters(self): # should return epochs epochs, _, _ = paradigm.get_data(dataset, subjects=[1], return_epochs=True) self.assertIsInstance(epochs, BaseEpochs) + + +class Test_FixedIntervalWindowsProcessing(unittest.TestCase): + def test_processing(self): + processings = [ + FixedIntervalWindowsProcessing(length=0.51, stride=0.27, resample=99), + FilterBankFixedIntervalWindowsProcessing( + length=0.51, stride=0.27, resample=99, filters=[[8, 35]] + ), + ] + for processing in processings: + for paradigm_name in ["ssvep", "p300", "imagery"]: + dataset = FakeDataset(paradigm=paradigm_name, n_sessions=1, n_runs=1) + X, labels, metadata = processing.get_data(dataset, subjects=[1]) + + # Verify that they have the same length + self.assertEqual(len(X), len(labels), len(metadata)) + # X must be a 3D array + self.assertEqual(len(X.shape), 3) + # labels must contain 3 values + self.assertTrue(all(label == "Window" for label in labels)) + # metadata must have subjets, sessions, runs + self.assertTrue("subject" in metadata.columns) + self.assertTrue("session" in metadata.columns) + self.assertTrue("run" in metadata.columns) + # Only one subject in the metadata + self.assertEqual(np.unique(metadata.subject), 1) + self.assertEqual(len(np.unique(metadata.session)), 1) + # should return epochs + epochs, _, _ = processing.get_data( + dataset, subjects=[1], return_epochs=True + ) + self.assertIsInstance(epochs, BaseEpochs) + # should return raws + raws, _, _ = processing.get_data(dataset, subjects=[1], return_raws=True) + for raw in raws: + self.assertIsInstance(raw, BaseRaw) + n_times = 60 * len(dataset.event_id) * 128 # 128 = dataset sfreq + n_epochs = ceil( + (n_times - int(processing.length * 128)) + / int(processing.stride * 128) + ) # no start/stop offset + self.assertEqual(n_epochs, len(epochs)) + # should raise error + self.assertRaises( + ValueError, + processing.get_data, + dataset, + subjects=[1], + return_epochs=True, + return_raws=True, + ) diff --git a/poetry.lock b/poetry.lock index ce9788b66..0d0b44dbc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "absl-py" @@ -486,6 +486,21 @@ files = [ {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"}, ] +[[package]] +name = "edflib-python" +version = "1.0.7" +description = "Library to read/write EDF+/BDF+ files written in pure Python by the same author as the original EDFlib." +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "EDFlib-Python-1.0.7.tar.gz", hash = "sha256:81f671a69876d0ea8b62da3139615c39512d108e5097981c5e1b6e30b691acb8"}, + {file = "EDFlib_Python-1.0.7-py3-none-any.whl", hash = "sha256:9d81fbca705934b43fd568e20a01250aad475d591787fc0cc9f4bd68166f4cf9"}, +] + +[package.dependencies] +numpy = ">=1.17" + [[package]] name = "filelock" version = "3.12.0" @@ -1221,6 +1236,31 @@ tqdm = "*" hdf5 = ["h5io", "pymatreader"] test = ["EDFlib-Python", "black", "check-manifest", "codespell", "eeglabio", "imageio (>=2.6.1)", "imageio-ffmpeg (>=0.4.1)", "nbclient", "nitime", "numpydoc", "pre-commit", "pybv", "pytest", "pytest-cov", "pytest-harvest", "pytest-timeout", "ruff", "sphinx-gallery", "tomli", "twine", "wheel"] +[[package]] +name = "mne-bids" +version = "0.13.dev0" +description = "MNE-BIDS: Organizing MEG, EEG, and iEEG data according to the BIDS specification and facilitating their analysis with MNE-Python" +category = "main" +optional = false +python-versions = "~=3.8" +files = [] +develop = false + +[package.dependencies] +mne = ">=1.3" +numpy = ">=1.20.2" +scipy = ">=1.6.3" +setuptools = "*" + +[package.extras] +full = ["EDFlib-Python (>=1.0.6)", "matplotlib (>=3.4.0)", "nibabel (>=3.2.1)", "pandas (>=1.2.4)", "pybv (>=0.7.5)", "pymatreader (>=0.0.30)"] + +[package.source] +type = "git" +url = "https://github.com/mne-tools/mne-bids.git" +reference = "9bc6fe5" +resolved_reference = "9bc6fe53e3fb320d9917dde44e5e8d2f1a43d4a8" + [[package]] name = "nodeenv" version = "1.8.0" @@ -2033,7 +2073,7 @@ stats = ["scipy (>=1.3)", "statsmodels (>=0.10)"] name = "setuptools" version = "67.8.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2748,4 +2788,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = ">=3.8, <3.11" -content-hash = "65a3eae84d678bb8bbb2d79898e381af953b7cfc2f52b38d07841d0d6ce4c92b" +content-hash = "2cb1f9ba87f20785aa1ae76bec4187451dca5448dc26bffb33d644c7c574610b" diff --git a/pyproject.toml b/pyproject.toml index 82499576b..b26a0bf66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,8 @@ requests = "^2.28.1" tqdm = "^4.64.1" coverage = "^7.0.1" memory-profiler = "^0.61.0" +edflib-python = "^1.0.6" +mne-bids = { git = "https://github.com/mne-tools/mne-bids.git", rev = "9bc6fe5" } # until release is available [tool.poetry.group.carbonemission] optional = true @@ -56,7 +58,7 @@ pydata-sphinx-theme = "^0.13.2" numpydoc = "^1.5.0" pre-commit = "^2.21.0" m2r2 = "^0.3.0" -tdlda = {git = "https://github.com/jsosulski/tdlda.git", rev = "0.1.0"} +tdlda = { git = "https://github.com/jsosulski/tdlda.git", rev = "0.1.0" } sphinx-design = "^0.3.0" sphinx-rtd-theme = "^1.2.0" From 946372e70db3a9f88c5a2ef38121a58cc166bb92 Mon Sep 17 00:00:00 2001 From: Bru Date: Tue, 1 Aug 2023 13:12:43 +0200 Subject: [PATCH 28/64] Improve pre commit configuration, docs check (#440) * Updating the whats_new.rst and fixing the dataset list * Improving the pre-commit * Improving the pre-commit * changing permision file * changing other files permission * changing other files permission * Fixing the yaml files with yamllint * Removing the double quoted string fixer * Ignoring the E501 * Removing autopep8 * Fixing typos * install pip * Adding doc sytle * Removing the docs checkers. Not pratical. * [pre-commit.ci] auto fixes from pre-commit.com hooks --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 3 +- examples/plot_benchmark.py | 2 +- .../plot_vr_pc_p300_different_epoch_size.py | 7 ++- moabb/analysis/__init__.py | 1 - moabb/analysis/meta_analysis.py | 21 ++++--- moabb/analysis/plotting.py | 10 +-- moabb/analysis/results.py | 12 ++-- moabb/benchmark.py | 8 +-- moabb/datasets/Lee2019.py | 8 +-- moabb/datasets/Weibo2014.py | 2 +- moabb/datasets/Zhou2016.py | 6 +- moabb/datasets/__init__.py | 3 +- moabb/datasets/alex_mi.py | 7 +-- moabb/datasets/bbci_eeg_fnirs.py | 10 ++- moabb/datasets/bnci.py | 42 ++++++------- moabb/datasets/braininvaders.py | 32 +++++----- moabb/datasets/compound_dataset/base.py | 9 ++- moabb/datasets/download.py | 14 ++--- moabb/datasets/epfl.py | 2 +- moabb/datasets/fake.py | 4 +- moabb/datasets/gigadb.py | 6 +- moabb/datasets/huebner_llp.py | 14 ++--- moabb/datasets/mpi_mi.py | 7 +-- moabb/datasets/neiry.py | 8 ++- moabb/datasets/phmd_ml.py | 7 +-- moabb/datasets/physionet_mi.py | 6 +- moabb/datasets/schirrmeister2017.py | 3 +- moabb/datasets/sosulski2019.py | 3 +- moabb/datasets/ssvep_exo.py | 8 +-- moabb/datasets/ssvep_mamem.py | 19 +++--- moabb/datasets/ssvep_nakanishi.py | 8 +-- moabb/datasets/ssvep_wang.py | 8 +-- moabb/datasets/upper_limb.py | 3 +- moabb/datasets/utils.py | 13 ++-- moabb/evaluations/__init__.py | 6 +- moabb/evaluations/base.py | 2 - moabb/evaluations/utils.py | 21 +++---- moabb/paradigms/__init__.py | 12 ++-- moabb/paradigms/base.py | 27 ++++---- moabb/paradigms/motor_imagery.py | 17 ++---- moabb/paradigms/p300.py | 8 +-- moabb/paradigms/resting_state.py | 15 ++--- moabb/paradigms/ssvep.py | 56 +++++++++++------ moabb/pipelines/__init__.py | 8 +-- moabb/pipelines/classification.py | 27 ++++---- moabb/pipelines/csp.py | 10 +-- moabb/pipelines/deep_learning.py | 23 ++++--- moabb/pipelines/features.py | 61 +++++++++++-------- moabb/pipelines/utils.py | 20 +++--- moabb/pipelines/utils_deep_model.py | 7 +-- moabb/pipelines/utils_pytorch.py | 14 ++--- moabb/tests/datasets.py | 6 +- moabb/tests/download.py | 4 +- moabb/tests/evaluations.py | 15 ++--- moabb/tests/paradigms.py | 6 +- moabb/tests/util_braindecode.py | 27 ++++---- moabb/utils.py | 27 ++++---- scripts/generating_metainfo.py | 9 +-- ...orial_3_benchmarking_multiple_pipelines.py | 2 +- tutorials/tutorial_4_adding_a_dataset.py | 14 ++--- 60 files changed, 361 insertions(+), 399 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97f655314..820756d13 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,6 @@ repos: language_version: python3.8 args: [--line-length=90, --target-version=py38] - - repo: https://github.com/asottile/blacken-docs rev: 1.15.0 hooks: @@ -68,7 +67,7 @@ repos: exclude: ^docs/ | ^setup\.py$ | - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.280 + rev: v0.0.281 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix, --ignore, E501] diff --git a/examples/plot_benchmark.py b/examples/plot_benchmark.py index 227a890e6..eea01867e 100644 --- a/examples/plot_benchmark.py +++ b/examples/plot_benchmark.py @@ -1,4 +1,4 @@ -""" +"""Examples of how to use MOABB to benchmark pipelines. ======================= Benchmarking with MOABB ======================= diff --git a/examples/plot_vr_pc_p300_different_epoch_size.py b/examples/plot_vr_pc_p300_different_epoch_size.py index 5b2822e66..f70701420 100644 --- a/examples/plot_vr_pc_p300_different_epoch_size.py +++ b/examples/plot_vr_pc_p300_different_epoch_size.py @@ -1,7 +1,8 @@ -""" -============================= +""" Example of P300 classification with different epoch size. + +======================================= Changing epoch size in P300 VR dataset -============================= +======================================= This example shows how to extract the epochs from the P300-VR dataset of a given subject and then classify them using Riemannian Geometry framework for BCI. diff --git a/moabb/analysis/__init__.py b/moabb/analysis/__init__.py index 83daa40f2..900aa04a7 100644 --- a/moabb/analysis/__init__.py +++ b/moabb/analysis/__init__.py @@ -33,7 +33,6 @@ def analyze(results, out_path, name="analysis", plot=False): plot: whether to plot results Either path or results is necessary - """ # input checks # if not isinstance(out_path, str): diff --git a/moabb/analysis/meta_analysis.py b/moabb/analysis/meta_analysis.py index 574d58061..dcad49a67 100644 --- a/moabb/analysis/meta_analysis.py +++ b/moabb/analysis/meta_analysis.py @@ -1,3 +1,4 @@ +"""Meta-analysis functions for MOABB.""" import itertools import logging @@ -10,7 +11,7 @@ def collapse_session_scores(df): - """Prepare results dataframe for computing statistics + """Prepare results dataframe for computing statistics. Parameters ---------- @@ -31,7 +32,7 @@ def collapse_session_scores(df): def compute_pvals_wilcoxon(df, order=None): - """Compute Wilcoxon rank-sum test on aggregated results + """Compute Wilcoxon rank-sum test on aggregated results. Returns kxk matrix of p-values computed via the Wilcoxon rank-sum test, order defines the order of rows and columns @@ -72,7 +73,7 @@ def compute_pvals_wilcoxon(df, order=None): def _pairedttest_exhaustive(data): - """Exhaustive paired t-test for permutation tests + """Exhaustive paired t-test for permutation tests. Returns p-values for exhaustive ttest that runs through all possible permutations of the first dimension. Very bad idea for size greater than 12 @@ -104,7 +105,7 @@ def _pairedttest_exhaustive(data): def _pairedttest_random(data, nperms): - """Returns p-values based on nperms permutations of a paired ttest + """Return p-values based on nperms permutations of a paired ttest. data is a (subj, alg, alg) matrix of differences between scores for each pair of algorithms per subject @@ -133,7 +134,7 @@ def _pairedttest_random(data, nperms): def compute_pvals_perm(df, order=None): - """Compute permutation test on aggregated results + """Compute permutation test on aggregated results. Returns kxk matrix of p-values computed via permutation test, order defines the order of rows and columns @@ -172,7 +173,7 @@ def compute_pvals_perm(df, order=None): def compute_effect(df, order=None): - """Compute effect size across datasets + """Compute effect size across datasets. Returns kxk matrix of effect sizes, order defines the order of rows/columns @@ -207,7 +208,7 @@ def compute_effect(df, order=None): def compute_dataset_statistics(df, perm_cutoff=20): - """Compute meta-analysis statistics from results dataframe + """Compute meta-analysis statistics from results dataframe. Parameters ---------- @@ -245,7 +246,7 @@ def compute_dataset_statistics(df, perm_cutoff=20): def combine_effects(effects, nsubs): - """Combining effects for meta-analysis statistics + """Combine effects for meta-analysis statistics. Function that takes effects from each experiments and number of subjects to return meta-analysis effect @@ -268,7 +269,7 @@ def combine_effects(effects, nsubs): def combine_pvalues(p, nsubs): - """Combining p-values for meta-analysis statistics + """Combine p-values for meta-analysis statistics. Function that takes pvals from each experiments and number of subjects to return meta-analysis significance using Stouffer's method @@ -294,7 +295,7 @@ def combine_pvalues(p, nsubs): def find_significant_differences(df, perm_cutoff=20): - """Compute differences between pipelines across datasets + """Compute differences between pipelines across datasets. Compute matrix of p-values for all algorithms over all datasets via combined p-values method diff --git a/moabb/analysis/plotting.py b/moabb/analysis/plotting.py index d112cbcae..ff7fcf1d9 100644 --- a/moabb/analysis/plotting.py +++ b/moabb/analysis/plotting.py @@ -28,7 +28,7 @@ def _simplify_names(x): def score_plot(data, pipelines=None): - """Plot scores for all pipelines and all datasets + """Plot scores for all pipelines and all datasets. Parameters ---------- @@ -77,7 +77,7 @@ def score_plot(data, pipelines=None): def codecarbon_plot(data, order_list=None, pipelines=None, country=""): - """Plot code carbon consume for the results from the benchmark + """Plot code carbon consume for the results from the benchmark. Parameters ---------- @@ -128,7 +128,7 @@ def codecarbon_plot(data, order_list=None, pipelines=None, country=""): def paired_plot(data, alg1, alg2): - """Generate a figure with a paired plot + """Generate a figure with a paired plot. Parameters ---------- @@ -160,7 +160,7 @@ def paired_plot(data, alg1, alg2): def summary_plot(sig_df, effect_df, p_threshold=0.05, simplify=True): - """Significance matrix to compare pipelines + """Significance matrix to compare pipelines. Visualize significances as a heatmap with green/grey/red for significantly higher/significantly lower. @@ -220,7 +220,7 @@ def summary_plot(sig_df, effect_df, p_threshold=0.05, simplify=True): def meta_analysis_plot(stats_df, alg1, alg2): # noqa: C901 - """Meta-analysis to compare two algorithms across several datasets + """Meta-analysis to compare two algorithms across several datasets. A meta-analysis style plot that shows the standardized effect with confidence intervals over all datasets for two algorithms. diff --git a/moabb/analysis/results.py b/moabb/analysis/results.py index 0360dc5a1..ea5078062 100644 --- a/moabb/analysis/results.py +++ b/moabb/analysis/results.py @@ -56,7 +56,6 @@ class Results: Saves dataframe per pipeline and can query to see if particular subject has already been run - """ def __init__( @@ -68,9 +67,7 @@ def __init__( hdf5_path=None, additional_columns=None, ): - """ - class that will abstract result storage - """ + """Class that will abstract result storage.""" from moabb.evaluations.base import BaseEvaluation from moabb.paradigms.base import BaseParadigm @@ -112,7 +109,7 @@ class that will abstract result storage ) def add(self, results, pipelines): # noqa: C901 - """add results""" + """Add results.""" def to_list(res): if type(res) is dict: @@ -231,9 +228,8 @@ def not_yet_computed(self, pipelines, dataset, subj): return ret def _already_computed(self, pipeline, dataset, subject, session=None): - """Check if we have results for a current combination of pipeline - / dataset / subject. - """ + """Check if we have results for a current combination of pipeline / + dataset / subject.""" with h5py.File(self.filepath, "r") as f: # get the digest from repr digest = get_digest(pipeline) diff --git a/moabb/benchmark.py b/moabb/benchmark.py index 89fca3657..1bb1098c4 100644 --- a/moabb/benchmark.py +++ b/moabb/benchmark.py @@ -45,7 +45,7 @@ def benchmark( # noqa: C901 include_datasets=None, exclude_datasets=None, ): - """Run benchmarks for selected pipelines and datasets + """Run benchmarks for selected pipelines and datasets. Load from saved pipeline configurations to determine associated paradigms. It is possible to include or exclude specific datasets and to choose the type of @@ -219,7 +219,7 @@ def benchmark( # noqa: C901 def _display_results(results): - """Print results after computation""" + """Print results after computation.""" tab = [] for d in results["dataset"].unique(): for p in results["pipeline"].unique(): @@ -246,7 +246,7 @@ def _display_results(results): def _combine_paradigms(prdgm_results): - """Combining FilterBank and direct paradigms + """Combining FilterBank and direct paradigms. Applied only on SSVEP for now. @@ -272,7 +272,7 @@ def _combine_paradigms(prdgm_results): def _save_results(eval_results, output, plot): - """Save results in specified folder + """Save results in specified folder. Parameters ---------- diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py index 25c3a469c..040416bae 100644 --- a/moabb/datasets/Lee2019.py +++ b/moabb/datasets/Lee2019.py @@ -1,6 +1,4 @@ -""" -BMI/OpenBMI dataset -""" +"""BMI/OpenBMI dataset.""" from functools import partialmethod import numpy as np @@ -17,7 +15,7 @@ class Lee2019(BaseDataset): - """Base dataset class for Lee2019""" + """Base dataset class for Lee2019.""" def __init__( self, @@ -164,7 +162,7 @@ def _get_single_rest_run(self, data, prefix): return raw def _get_single_subject_data(self, subject): - """return data for a single subejct""" + """Return data for a single subejct.""" sessions = {} file_path_list = self.data_path(subject) diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py index 4f7eda7b7..a0efe7c12 100644 --- a/moabb/datasets/Weibo2014.py +++ b/moabb/datasets/Weibo2014.py @@ -128,7 +128,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" fname = self.data_path(subject) # TODO: add 1s 0 buffer between trials and make continuous data = loadmat( diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py index a6c3d558e..b4eb1e841 100644 --- a/moabb/datasets/Zhou2016.py +++ b/moabb/datasets/Zhou2016.py @@ -1,5 +1,5 @@ -""" -Simple and compound motor imagery. +"""Simple and compound motor imagery. + https://doi.org/10.1371/journal.pone.0114853 """ @@ -99,7 +99,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" files = self.data_path(subject) out = {} diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index 52de2b897..e85bb27e9 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -1,5 +1,4 @@ -""" -A dataset handle and abstract low level access to the data. the dataset will +"""A dataset handle and abstract low level access to the data. the dataset will takes data stored locally, in the format in which they have been downloaded, and will convert them into a MNE raw object. There are options to pool all the different recording sessions per subject or to evaluate them separately. diff --git a/moabb/datasets/alex_mi.py b/moabb/datasets/alex_mi.py index ec507c82e..8b874a865 100644 --- a/moabb/datasets/alex_mi.py +++ b/moabb/datasets/alex_mi.py @@ -1,6 +1,4 @@ -""" -Alex Motor imagery dataset. -""" +"""Alex Motor imagery dataset.""" from mne.io import Raw @@ -44,7 +42,6 @@ class AlexMI(BaseDataset): interface cerveau machine EEG asynchrone (Doctoral dissertation, Université de Grenoble). https://tel.archives-ouvertes.fr/tel-01196752 - """ def __init__(self): @@ -58,7 +55,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" raw = Raw(self.data_path(subject), preload=True, verbose="ERROR") return {"session_0": {"run_0": raw}} diff --git a/moabb/datasets/bbci_eeg_fnirs.py b/moabb/datasets/bbci_eeg_fnirs.py index f4e780bf2..797087435 100644 --- a/moabb/datasets/bbci_eeg_fnirs.py +++ b/moabb/datasets/bbci_eeg_fnirs.py @@ -1,6 +1,4 @@ -""" -BBCI EEG fNIRS Motor imagery dataset. -""" +"""BBCI EEG fNIRS Motor imagery dataset.""" import os import os.path as op @@ -121,7 +119,7 @@ def __init__( self.fnirs = fnirs # TODO: actually incorporate fNIRS somehow def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" fname, fname_mrk = self.data_path(subject) data = loadmat(fname, squeeze_me=True, struct_as_record=False)["cnt"] mrk = loadmat(fname_mrk, squeeze_me=True, struct_as_record=False)["mrk"] @@ -183,7 +181,7 @@ def data_path( class Shin2017A(Shin2017): - """Motor Imagey Dataset from Shin et al 2017 + """Motor Imagey Dataset from Shin et al 2017. .. admonition:: Dataset summary @@ -303,7 +301,7 @@ def __init__(self, accept=False): class Shin2017B(Shin2017): - """Mental Arithmetic Dataset from Shin et al 2017 + """Mental Arithmetic Dataset from Shin et al 2017. .. admonition:: Dataset summary diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py index bdb26587c..539213bd6 100644 --- a/moabb/datasets/bnci.py +++ b/moabb/datasets/bnci.py @@ -1,6 +1,4 @@ -""" -BNCI 2014-001 Motor imagery dataset. -""" +"""BNCI 2014-001 Motor imagery dataset.""" import numpy as np from mne import create_info @@ -546,10 +544,21 @@ def _load_data_013_2015( def _convert_mi(filename, ch_names, ch_types): - """ - Processes (Graz) motor imagery data from MAT files, returns list of - recording runs. - """ + """Process (Graz) motor imagery data from MAT files. + + Parameters + ---------- + filename : str + Path to the MAT file. + ch_names : list of str + List of channel names. + ch_types : list of str + List of channel types. + + Returns + ------- + raw : instance of RawArray + returns list of recording runs.""" runs = [] event_id = {} data = loadmat(filename, struct_as_record=False, squeeze_me=True) @@ -717,10 +726,10 @@ def _convert_run_epfl(run, verbose=None): class MNEBNCI(BaseDataset): - """Base BNCI dataset""" + """Base BNCI dataset.""" def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" sessions = load_data(subject=subject, dataset=self.code, verbose=False) return sessions @@ -783,7 +792,6 @@ class BNCI2014001(MNEBNCI): References ---------- - .. [1] Tangermann, M., Müller, K.R., Aertsen, A., Birbaumer, N., Braun, C., Brunner, C., Leeb, R., Mehring, C., Miller, K.J., Mueller-Putz, G. and Nolte, G., 2012. Review of the BCI competition IV. @@ -845,12 +853,10 @@ class for validation. Participants had the task of performing sustained (5 References ----------- - .. [1] Steyrl, D., Scherer, R., Faller, J. and Müller-Putz, G.R., 2016. Random forests in non-invasive sensorimotor rhythm brain-computer interfaces: a practical and convenient non-linear classifier. Biomedical Engineering/Biomedizinische Technique, 61(1), pp.77-86. - """ def __init__(self): @@ -933,7 +939,6 @@ class BNCI2014004(MNEBNCI): G. Pfurtscheller. Brain-computer communication: motivation, aim, and impact of exploring a virtual apartment. IEEE Transactions on Neural Systems and Rehabilitation Engineering 15, 473–482, 2007 - """ def __init__(self): @@ -949,7 +954,7 @@ def __init__(self): class BNCI2014008(MNEBNCI): - """BNCI 2014-008 P300 dataset + """BNCI 2014-008 P300 dataset. .. admonition:: Dataset summary @@ -1004,7 +1009,6 @@ class BNCI2014008(MNEBNCI): toward a mental prosthesis utilizing eventrelated brain potentials, Electroencephalogr. Clin. Neurophysiol., vol. 70, n. 6, pagg. 510–523, 1988. - """ def __init__(self): @@ -1062,12 +1066,10 @@ class BNCI2014009(MNEBNCI): References ---------- - .. [1] P Aricò, F Aloise, F Schettini, S Salinari, D Mattia and F Cincotti (2013). Influence of P300 latency jitter on event related potential- based brain–computer interface performance. Journal of Neural Engineering, vol. 11, number 3. - """ def __init__(self): @@ -1118,12 +1120,10 @@ class BNCI2015001(MNEBNCI): References ---------- - .. [1] J. Faller, C. Vidaurre, T. Solis-Escalante, C. Neuper and R. Scherer (2012). Autocalibration and recurrent adaptation: Towards a plug and play online ERD- BCI. IEEE Transactions on Neural Systems and Rehabilitation Engineering, 20(3), 313-319. - """ def __init__(self): @@ -1163,12 +1163,10 @@ class BNCI2015003(MNEBNCI): References ---------- - .. [1] C. Guger, S. Daban, E. Sellers, C. Holzner, G. Krausz, R. Carabalona, F. Gramatica, and G. Edlinger (2009). How many people are able to control a P300-based brain-computer interface (BCI)?. Neuroscience Letters, vol. 462, pp. 94–98. - """ def __init__(self): @@ -1238,12 +1236,10 @@ class BNCI2015004(MNEBNCI): References ---------- - .. [1] Scherer R, Faller J, Friedrich EVC, Opisso E, Costa U, Kübler A, et al. (2015) Individually Adapted Imagery Improves Brain-Computer Interface Performance in End-Users with Disability. PLoS ONE 10(5). https://doi.org/10.1371/journal.pone.0123727 - """ def __init__(self): diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index 5fca06a80..9cc2872a7 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -403,7 +403,7 @@ def _bi_data_path( # noqa: C901 class bi2012(BaseDataset): - """P300 dataset bi2012 from a "Brain Invaders" experiment + """P300 dataset bi2012 from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== @@ -459,7 +459,7 @@ def __init__(self, Training=True, Online=False): self.online = Online def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" return _bi_get_subject_data(self, subject) def data_path( @@ -469,7 +469,7 @@ def data_path( class bi2013a(BaseDataset): - """P300 dataset bi2013a from a "Brain Invaders" experiment + """P300 dataset bi2013a from a "Brain Invaders" experiment. .. admonition:: Dataset summary @@ -561,7 +561,7 @@ def __init__(self, NonAdaptive=True, Adaptive=False, Training=True, Online=False self.online = Online def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" return _bi_get_subject_data(self, subject) def data_path( @@ -571,7 +571,7 @@ def data_path( class bi2014a(BaseDataset): - """P300 dataset bi2014a from a "Brain Invaders" experiment + """P300 dataset bi2014a from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== @@ -619,7 +619,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" return _bi_get_subject_data(self, subject) def data_path( @@ -629,7 +629,7 @@ def data_path( class bi2014b(BaseDataset): - """P300 dataset bi2014b from a "Brain Invaders" experiment + """P300 dataset bi2014b from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== @@ -678,7 +678,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" return _bi_get_subject_data(self, subject) def data_path( @@ -688,7 +688,7 @@ def data_path( class bi2015a(BaseDataset): - """P300 dataset bi2015a from a "Brain Invaders" experiment + """P300 dataset bi2015a from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== @@ -738,7 +738,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" return _bi_get_subject_data(self, subject) def data_path( @@ -748,7 +748,7 @@ def data_path( class bi2015b(BaseDataset): - """P300 dataset bi2015b from a "Brain Invaders" experiment + """P300 dataset bi2015b from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== @@ -801,7 +801,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" return _bi_get_subject_data(self, subject) def data_path( @@ -811,7 +811,7 @@ def data_path( class VirtualReality(BaseDataset): - """Dataset of an EEG-based BCI experiment in Virtual Reality using P300 + """Dataset of an EEG-based BCI experiment in Virtual Reality using P300. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== @@ -877,7 +877,7 @@ def __init__(self, virtual_reality=False, screen_display=True): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" return _bi_get_subject_data(self, subject) def data_path( @@ -886,8 +886,8 @@ def data_path( return _bi_data_path(self, subject, path, force_update, update_path, verbose) def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): - """Select data for all provided subjects, blocks and repetitions. - Each subject has 12 blocks of 5 repetitions. + """Select data for all provided subjects, blocks and repetitions. Each + subject has 12 blocks of 5 repetitions. The returned data is a dictionary with the following structure:: diff --git a/moabb/datasets/compound_dataset/base.py b/moabb/datasets/compound_dataset/base.py index 77ebde66a..f86ba9e3e 100644 --- a/moabb/datasets/compound_dataset/base.py +++ b/moabb/datasets/compound_dataset/base.py @@ -1,12 +1,11 @@ -""" -Build a custom dataset using subjects from other datasets. -""" +"""Build a custom dataset using subjects from other datasets.""" from ..base import BaseDataset class CompoundDataset(BaseDataset): - """With this dataset, you can merge different dataset + """CompoundDataset class. + With this dataset, you can merge different dataset by selecting among subjects in all datasets to build a custom dataset. @@ -83,7 +82,7 @@ def _set_subjects_list(self, subjects_list: list): self.subjects_list.extend(compoundDataset.subjects_list) def _get_single_subject_data(self, shopped_subject): - """return data for a single subject""" + """Return data for a single subject.""" dataset, subject, sessions, runs = self.subjects_list[shopped_subject - 1] subject_data = dataset._get_single_subject_data(subject) if sessions is None: diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py index eb59a4b5a..f1b166b3f 100644 --- a/moabb/datasets/download.py +++ b/moabb/datasets/download.py @@ -17,8 +17,7 @@ def get_dataset_path(sign, path): - """Returns the dataset path allowing for changes in MNE_DATA - config + """Returns the dataset path allowing for changes in MNE_DATA config. Parameters ---------- @@ -88,7 +87,6 @@ def data_path(url, sign, path=None, force_update=False, update_path=True, verbos path : list of str Local path to the given data file. This path is contained inside a list of length one, for compatibility. - """ # noqa: E501 path = get_dataset_path(sign, path) key_dest = "MNE-{:s}-data".format(sign.lower()) @@ -105,7 +103,7 @@ def data_path(url, sign, path=None, force_update=False, update_path=True, verbos @verbose def data_dl(url, sign, path=None, force_update=False, verbose=None): - """Download file from url to specified path + """Download file from url to specified path. This function should replace data_path as the MNE will not support the download of dataset anymore. This version is using Pooch. @@ -164,7 +162,7 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None): # This function is from https://github.com/cognoma/figshare (BSD-3-Clause) def fs_issue_request(method, url, headers, data=None, binary=False): - """Wrapper for HTTP request + """Wrapper for HTTP request. Parameters ---------- @@ -232,7 +230,7 @@ def fs_get_file_list(article_id, version=None): def fs_get_file_hash(filelist): - """Returns a dict associating figshare file id to MD5 hash + """Returns a dict associating figshare file id to MD5 hash. Parameters ---------- @@ -248,7 +246,7 @@ def fs_get_file_hash(filelist): def fs_get_file_id(filelist): - """Returns a dict associating filename to figshare file id + """Returns a dict associating filename to figshare file id. Parameters ---------- @@ -264,7 +262,7 @@ def fs_get_file_id(filelist): def fs_get_file_name(filelist): - """Returns a dict associating figshare file id to filename + """Returns a dict associating figshare file id to filename. Parameters ---------- diff --git a/moabb/datasets/epfl.py b/moabb/datasets/epfl.py index 3685b67ef..d4eeba8cd 100644 --- a/moabb/datasets/epfl.py +++ b/moabb/datasets/epfl.py @@ -150,7 +150,7 @@ def _get_single_run_data(self, file_path): return raw def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" file_path_list = self.data_path(subject) sessions = {} diff --git a/moabb/datasets/fake.py b/moabb/datasets/fake.py index 95923ca02..dca29e657 100644 --- a/moabb/datasets/fake.py +++ b/moabb/datasets/fake.py @@ -127,8 +127,8 @@ def _get_single_subject_data(self, subject): return data def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): - """Select data for all provided subjects, blocks and repetitions. - Each subject has 5 blocks of 12 repetitions. + """Select data for all provided subjects, blocks and repetitions. Each + subject has 5 blocks of 12 repetitions. The returned data is a dictionary with the following structure:: diff --git a/moabb/datasets/gigadb.py b/moabb/datasets/gigadb.py index cccaa015e..c4ad4be1b 100644 --- a/moabb/datasets/gigadb.py +++ b/moabb/datasets/gigadb.py @@ -1,6 +1,4 @@ -""" -GigaDb Motor imagery dataset. -""" +"""GigaDb Motor imagery dataset.""" import logging @@ -78,7 +76,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" fname = self.data_path(subject) data = loadmat( diff --git a/moabb/datasets/huebner_llp.py b/moabb/datasets/huebner_llp.py index ad7e36119..d23d1b327 100644 --- a/moabb/datasets/huebner_llp.py +++ b/moabb/datasets/huebner_llp.py @@ -113,8 +113,8 @@ def _extract_data(data_dir_extracted_path, data_archive_path): class Huebner2017(_BaseVisualMatrixSpellerDataset): - """ - Learning from label proportions for a visual matrix speller (ERP) dataset from Hübner et al 2017 [1]_. + """Learning from label proportions for a visual matrix speller (ERP) + dataset from Hübner et al 2017 [1]_. .. admonition:: Dataset summary @@ -180,8 +180,8 @@ def __init__(self, interval=None, raw_slice_offset=None, use_blocks_as_sessions= class Huebner2018(_BaseVisualMatrixSpellerDataset): - """ - Mixture of LLP and EM for a visual matrix speller (ERP) dataset from Hübner et al 2018 [1]_. + """Mixture of LLP and EM for a visual matrix speller (ERP) dataset from + Hübner et al 2018 [1]_. .. admonition:: Dataset summary @@ -239,9 +239,9 @@ def __init__(self, interval=None, raw_slice_offset=None, use_blocks_as_sessions= def _read_raw_llp_study_data(vhdr_fname, raw_slice_offset, verbose=None): - """ - Read LLP BVR recordings file. Ignore the different sequence lengths. Just tag event as target or non-target if it - contains a target or does not contain a target. + """Read LLP BVR recordings file. Ignore the different sequence lengths. + Just tag event as target or non-target if it contains a target or does not + contain a target. Parameters ---------- diff --git a/moabb/datasets/mpi_mi.py b/moabb/datasets/mpi_mi.py index 15439622c..bf0de31d4 100644 --- a/moabb/datasets/mpi_mi.py +++ b/moabb/datasets/mpi_mi.py @@ -1,6 +1,4 @@ -""" -Munich MI dataset -""" +"""Munich MI dataset.""" import mne import numpy as np @@ -62,7 +60,6 @@ class MunichMI(BaseDataset): .. [1] Grosse-Wentrup, Moritz, et al. "Beamforming in noninvasive brain–computer interfaces." IEEE Transactions on Biomedical Engineering 56.4 (2009): 1209-1219. - """ def __init__(self): @@ -77,7 +74,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" raw = mne.io.read_raw_eeglab( self.data_path(subject), preload=True, verbose="ERROR" ) diff --git a/moabb/datasets/neiry.py b/moabb/datasets/neiry.py index ed17eacf2..43b79462e 100644 --- a/moabb/datasets/neiry.py +++ b/moabb/datasets/neiry.py @@ -12,7 +12,8 @@ class DemonsP300(BaseDataset): - """Visual P300 dataset recorded in Virtual Reality (VR) game Raccoons versus Demons. + """Visual P300 dataset recorded in Virtual Reality (VR) game Raccoons + versus Demons. .. admonition:: Dataset summary @@ -118,7 +119,8 @@ def __init__(self): @staticmethod def _strip(session) -> tuple: - """Strips nans (from right side of all channels) added during hdf5 packaging + """Strips nans (from right side of all channels) added during hdf5 + packaging. Returns: tuple ready to be converted to `_session_dtype` @@ -131,7 +133,7 @@ def _strip(session) -> tuple: @classmethod def read_hdf(cls, filename) -> np.ndarray: - """Reads data from HDF file + """Reads data from HDF file. Returns: array of `_act_dtype` diff --git a/moabb/datasets/phmd_ml.py b/moabb/datasets/phmd_ml.py index b7dc6a4f2..fb369680b 100644 --- a/moabb/datasets/phmd_ml.py +++ b/moabb/datasets/phmd_ml.py @@ -12,8 +12,7 @@ class HeadMountedDisplay(BaseDataset): - """ - Passive Head Mounted Display with Music Listening dataset. + """Passive Head Mounted Display with Music Listening dataset. .. admonition:: Dataset summary @@ -58,8 +57,6 @@ class HeadMountedDisplay(BaseDataset): .. [1] G. Cattan, P. L. Coelho Rodrigues, and M. Congedo, ‘Passive Head-Mounted Display Music-Listening EEG dataset’, Gipsa-Lab ; IHMTEK, Research Report 2, Mar. 2019. doi: 10.5281/zenodo.2617084. - - """ def __init__(self): @@ -94,7 +91,7 @@ def __init__(self): self._chtypes = ["eeg"] * 16 + ["stim"] def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" filepath = self.data_path(subject)[0] data = loadmat(os.path.join(filepath, os.listdir(filepath)[0])) diff --git a/moabb/datasets/physionet_mi.py b/moabb/datasets/physionet_mi.py index 8afe12c9f..9e156cbbc 100644 --- a/moabb/datasets/physionet_mi.py +++ b/moabb/datasets/physionet_mi.py @@ -1,6 +1,4 @@ -""" -Physionet Motor imagery dataset. -""" +"""Physionet Motor imagery dataset.""" import mne import numpy as np @@ -120,7 +118,7 @@ def _load_one_run(self, subject, run, preload=True): return raw def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" data = {} sign = "EEGBCI" get_dataset_path(sign, None) diff --git a/moabb/datasets/schirrmeister2017.py b/moabb/datasets/schirrmeister2017.py index 69e4cae61..a842f2387 100644 --- a/moabb/datasets/schirrmeister2017.py +++ b/moabb/datasets/schirrmeister2017.py @@ -15,7 +15,7 @@ class Schirrmeister2017(BaseDataset): - """High-gamma dataset described in Schirrmeister et al. 2017 + """High-gamma dataset described in Schirrmeister et al. 2017. .. admonition:: Dataset summary @@ -61,7 +61,6 @@ class Schirrmeister2017(BaseDataset): .. [1] Schirrmeister, Robin Tibor, et al. "Deep learning with convolutional neural networks for EEG decoding and visualization." Human brain mapping 38.11 (2017): 5391-5420. - """ def __init__(self): diff --git a/moabb/datasets/sosulski2019.py b/moabb/datasets/sosulski2019.py index 812f96c83..291a9ab08 100644 --- a/moabb/datasets/sosulski2019.py +++ b/moabb/datasets/sosulski2019.py @@ -80,7 +80,6 @@ class Sosulski2019(BaseDataset): ----- .. versionadded:: 0.4.5 - """ def __init__( @@ -146,7 +145,7 @@ def _get_single_run_data(self, file_path): return raw def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" file_path_list = self.data_path(subject) sessions = {} diff --git a/moabb/datasets/ssvep_exo.py b/moabb/datasets/ssvep_exo.py index 7ab0a4907..3584e2331 100644 --- a/moabb/datasets/ssvep_exo.py +++ b/moabb/datasets/ssvep_exo.py @@ -1,6 +1,4 @@ -""" -SSVEP Exoskeleton dataset. -""" +"""SSVEP Exoskeleton dataset.""" from mne.io import Raw @@ -12,7 +10,7 @@ class SSVEPExo(BaseDataset): - """SSVEP Exo dataset + """SSVEP Exo dataset. .. admonition:: Dataset summary @@ -69,7 +67,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """Return the data of a single subject""" + """Return the data of a single subject.""" out = {} paths = self.data_path(subject, update_path=True, verbose=False) diff --git a/moabb/datasets/ssvep_mamem.py b/moabb/datasets/ssvep_mamem.py index f656dc32f..56af02ee3 100644 --- a/moabb/datasets/ssvep_mamem.py +++ b/moabb/datasets/ssvep_mamem.py @@ -1,6 +1,4 @@ -""" -SSVEP MAMEM1 dataset. -""" +"""SSVEP MAMEM1 dataset.""" import logging import os.path as osp @@ -37,9 +35,10 @@ def mamem_event(eeg, dins, labels=None): - """Convert DIN field into events + """Convert DIN field into events. - Code adapted from https://github.com/MAMEM/eeg-processing-toolbox + Code adapted from + https://github.com/MAMEM/eeg-processing-toolbox """ thres_split = 2000 timestamps = dins[1, :] @@ -82,7 +81,7 @@ def mamem_event(eeg, dins, labels=None): class BaseMAMEM(BaseDataset): - """Base class for MAMEM datasets""" + """Base class for MAMEM datasets.""" def __init__(self, events, sessions_per_subject, code, doi, figshare_id): super().__init__( @@ -97,7 +96,7 @@ def __init__(self, events, sessions_per_subject, code, doi, figshare_id): self.figshare_id = figshare_id def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" fnames = self.data_path(subject) filelist = fs_get_file_list(self.figshare_id) fsn = fs_get_file_name(filelist) @@ -167,7 +166,7 @@ def data_path( class MAMEM1(BaseMAMEM): - """SSVEP MAMEM 1 dataset + """SSVEP MAMEM 1 dataset. .. admonition:: Dataset summary @@ -287,7 +286,7 @@ def __init__(self): class MAMEM2(BaseMAMEM): - """SSVEP MAMEM 2 dataset + """SSVEP MAMEM 2 dataset. .. admonition:: Dataset summary @@ -380,7 +379,7 @@ def __init__(self): class MAMEM3(BaseMAMEM): - """SSVEP MAMEM 3 dataset + """SSVEP MAMEM 3 dataset. .. admonition:: Dataset summary diff --git a/moabb/datasets/ssvep_nakanishi.py b/moabb/datasets/ssvep_nakanishi.py index 2dd938157..342da58d9 100644 --- a/moabb/datasets/ssvep_nakanishi.py +++ b/moabb/datasets/ssvep_nakanishi.py @@ -1,6 +1,4 @@ -""" -SSVEP Nakanishi dataset. -""" +"""SSVEP Nakanishi dataset.""" import logging @@ -20,7 +18,7 @@ class Nakanishi2015(BaseDataset): - """SSVEP Nakanishi 2015 dataset + """SSVEP Nakanishi 2015 dataset. .. admonition:: Dataset summary @@ -70,7 +68,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """Return the data of a single subject""" + """Return the data of a single subject.""" n_samples, n_channels, n_trials = 1114, 8, 15 n_classes = len(self.event_id) diff --git a/moabb/datasets/ssvep_wang.py b/moabb/datasets/ssvep_wang.py index 8c25e4332..0dd05d210 100644 --- a/moabb/datasets/ssvep_wang.py +++ b/moabb/datasets/ssvep_wang.py @@ -1,6 +1,4 @@ -""" -SSVEP Wang dataset. -""" +"""SSVEP Wang dataset.""" import logging @@ -22,7 +20,7 @@ class Wang2016(BaseDataset): - """SSVEP Wang 2016 dataset + """SSVEP Wang 2016 dataset. .. admonition:: Dataset summary @@ -120,7 +118,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """Return the data of a single subject""" + """Return the data of a single subject.""" n_samples, n_channels, n_trials = 1500, 64, 6 n_classes = len(self.event_id) diff --git a/moabb/datasets/upper_limb.py b/moabb/datasets/upper_limb.py index e913e18c9..310ca76ca 100644 --- a/moabb/datasets/upper_limb.py +++ b/moabb/datasets/upper_limb.py @@ -62,7 +62,6 @@ class Ofner2017(BaseDataset): Upper limb movements can be decoded from the time-domain of low-frequency EEG. PloS one, 12(8), p.e0182578. https://doi.org/10.1371/journal.pone.0182578 - """ def __init__(self, imagined=True, executed=False): @@ -90,7 +89,7 @@ def __init__(self, imagined=True, executed=False): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" sessions = [] if self.imagined: diff --git a/moabb/datasets/utils.py b/moabb/datasets/utils.py index 60224b347..7262f5b2f 100644 --- a/moabb/datasets/utils.py +++ b/moabb/datasets/utils.py @@ -1,6 +1,4 @@ -""" -Utils for easy database selection -""" +"""Utils for easy database selection.""" import inspect @@ -29,8 +27,7 @@ def dataset_search( # noqa: C901 min_subjects=1, channels=(), ): - """ - Returns a list of datasets that match a given criteria + """Returns a list of datasets that match a given criteria. Parameters ---------- @@ -112,10 +109,8 @@ def dataset_search( # noqa: C901 def find_intersecting_channels(datasets, verbose=False): - """ - Given a list of dataset instances return a list of channels shared by all - datasets. - Skip datasets which have 0 overlap with the others + """Given a list of dataset instances return a list of channels shared by + all datasets. Skip datasets which have 0 overlap with the others. returns: set of common channels, list of datasets with valid channels """ diff --git a/moabb/evaluations/__init__.py b/moabb/evaluations/__init__.py index ec86b8e29..67b0780ee 100644 --- a/moabb/evaluations/__init__.py +++ b/moabb/evaluations/__init__.py @@ -1,9 +1,7 @@ -""" -An evaluation defines how we go from trials per subject and session to a +"""An evaluation defines how we go from trials per subject and session to a generalization statistic (AUC score, f-score, accuracy, etc) -- it can be either within-recording-session accuracy, across-session within-subject -accuracy, across-subject accuracy, or other transfer learning settings. -""" +accuracy, across-subject accuracy, or other transfer learning settings.""" # flake8: noqa from .evaluations import ( CrossSessionEvaluation, diff --git a/moabb/evaluations/base.py b/moabb/evaluations/base.py index cfb3ceb04..9767738a5 100644 --- a/moabb/evaluations/base.py +++ b/moabb/evaluations/base.py @@ -147,7 +147,6 @@ def process(self, pipelines, param_grid=None): ------- results: pd.DataFrame A dataframe containing the results. - """ # check pipelines @@ -211,5 +210,4 @@ def is_valid(self, dataset): ---------- dataset : dataset instance The dataset to verify. - """ diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index e8241e8c2..85048e0cb 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -9,8 +9,8 @@ def _check_if_is_keras_model(model): - """ - Check if the model is a Keras model + """Check if the model is a Keras model. + Parameters ---------- model: object @@ -30,8 +30,8 @@ def _check_if_is_keras_model(model): def _check_if_is_pytorch_model(model): - """ - Check if the model is a Keras model + """Check if the model is a Keras model. + Parameters ---------- model: object @@ -71,8 +71,8 @@ def _check_if_is_keras_steps(model): def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): - """ - Save a model fitted to a folder + """Save a model fitted to a folder. + Parameters ---------- model: object @@ -86,7 +86,6 @@ def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): Returns ------- - """ if save_path is None: raise IOError("No path to save the model") @@ -124,8 +123,8 @@ def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): def save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path: str): - """ - Save a list of models fitted to a folder + """Save a list of models fitted to a folder. + Parameters ---------- model_list: list | Pipeline @@ -163,8 +162,8 @@ def create_save_path( grid=False, eval_type="WithinSession", ): - """ - Create a save path based on evaluation parameters. + """Create a save path based on evaluation parameters. + Parameters ---------- hdf5_path : str diff --git a/moabb/paradigms/__init__.py b/moabb/paradigms/__init__.py index 047a59681..02fc0a52e 100644 --- a/moabb/paradigms/__init__.py +++ b/moabb/paradigms/__init__.py @@ -1,9 +1,9 @@ -""" -A paradigm defines how the raw data will be converted to trials ready to be -processed by a decoding algorithm. This is a function of the paradigm used, -i.e. in motor imagery one can have two-class, multi-class, or continuous -paradigms; similarly, different preprocessing is necessary for ERP vs ERD -paradigms. +"""A paradigm defines how the raw data will be converted to trials ready to be +processed by a decoding algorithm. + +This is a function of the paradigm used, i.e. in motor imagery one can +have two-class, multi-class, or continuous paradigms; similarly, +different preprocessing is necessary for ERP vs ERD paradigms. """ from moabb.paradigms.fixed_interval_windows import * from moabb.paradigms.motor_imagery import * diff --git a/moabb/paradigms/base.py b/moabb/paradigms/base.py index 3fdc1b4f7..4a10aa35d 100644 --- a/moabb/paradigms/base.py +++ b/moabb/paradigms/base.py @@ -29,6 +29,7 @@ class BaseProcessing(metaclass=abc.ABCMeta): Please use one of the child classes + Parameters ---------- @@ -83,7 +84,7 @@ def __init__( @property @abc.abstractmethod def datasets(self): - """Property that define the list of compatible datasets""" + """Property that define the list of compatible datasets.""" pass @abc.abstractmethod @@ -106,19 +107,17 @@ def is_valid(self, dataset): pass def prepare_process(self, dataset): - """Prepare processing of raw files - - This function allows to set parameter of the paradigm class prior to - the preprocessing (process_raw). Does nothing by default and could be - overloaded if needed. + """Prepare processing of raw files. - Parameters - ---------- + This function allows to set parameter of the paradigm class prior to + the preprocessing (process_raw). Does nothing by default and could be + overloaded if needed. - dataset : dataset instance - The dataset corresponding to the raw file. mainly use to access - dataset specific i - nformation. + Parameters + ---------- + dataset : dataset instance + The dataset corresponding to the raw file. mainly use to access + dataset specific information. """ if dataset is not None: pass @@ -146,7 +145,7 @@ def get_data( # noqa: C901 - session : the session indice - run : the run indice - parameters + Parameters ---------- dataset: A dataset instance. @@ -161,7 +160,7 @@ def get_data( # noqa: C901 cache_config: dict | CacheConfig Configuration for caching of datasets. See :class:`moabb.datasets.base.CacheConfig` for details. - returns + Eeturns ------- X : Union[np.ndarray, mne.Epochs] the data that will be used as features for the model diff --git a/moabb/paradigms/motor_imagery.py b/moabb/paradigms/motor_imagery.py index 05c7d0273..657a8e814 100644 --- a/moabb/paradigms/motor_imagery.py +++ b/moabb/paradigms/motor_imagery.py @@ -1,4 +1,4 @@ -"""Motor Imagery Paradigms""" +"""Motor Imagery Paradigms.""" import abc import logging @@ -147,7 +147,6 @@ class SinglePass(BaseMotorImagery): resample: float | None (default None) If not None, resample the eeg data with the sampling rate provided. - """ def __init__(self, fmin=8, fmax=32, **kwargs): @@ -164,15 +163,14 @@ def __init__( filters=([8, 12], [12, 16], [16, 20], [20, 24], [24, 28], [28, 32]), **kwargs, ): - """init""" + """init.""" super().__init__(filters=filters, **kwargs) class LeftRightImagery(SinglePass): - """Motor Imagery for left hand/right hand classification + """Motor Imagery for left hand/right hand classification. Metric is 'roc_auc' - """ def __init__(self, **kwargs): @@ -189,10 +187,9 @@ def scoring(self): class FilterBankLeftRightImagery(FilterBank): - """Filter Bank Motor Imagery for left hand/right hand classification + """Filter Bank Motor Imagery for left hand/right hand classification. Metric is 'roc_auc' - """ def __init__(self, **kwargs): @@ -209,8 +206,7 @@ def scoring(self): class FilterBankMotorImagery(FilterBank): - """ - Filter bank n-class motor imagery. + """Filter bank n-class motor imagery. Metric is 'roc-auc' if 2 classes and 'accuracy' if more @@ -293,8 +289,7 @@ def scoring(self): class MotorImagery(SinglePass): - """ - N-class motor imagery. + """N-class motor imagery. Metric is 'roc-auc' if 2 classes and 'accuracy' if more diff --git a/moabb/paradigms/p300.py b/moabb/paradigms/p300.py index 88f692fd3..6b4109130 100644 --- a/moabb/paradigms/p300.py +++ b/moabb/paradigms/p300.py @@ -1,4 +1,4 @@ -"""P300 Paradigms""" +"""P300 Paradigms.""" import logging @@ -106,7 +106,7 @@ def scoring(self): class SinglePass(BaseP300): - """Single Bandpass filter P300 + """Single Bandpass filter P300. P300 paradigm with only one bandpass filter (default 1 to 24 Hz) @@ -147,7 +147,6 @@ class SinglePass(BaseP300): resample: float | None (default None) If not None, resample the eeg data with the sampling rate provided. - """ def __init__(self, fmin=1, fmax=24, **kwargs): @@ -165,10 +164,9 @@ def fmin(self): class P300(SinglePass): - """P300 for Target/NonTarget classification + """P300 for Target/NonTarget classification. Metric is 'roc_auc' - """ def __init__(self, **kwargs): diff --git a/moabb/paradigms/resting_state.py b/moabb/paradigms/resting_state.py index 52eb40c31..c42317611 100644 --- a/moabb/paradigms/resting_state.py +++ b/moabb/paradigms/resting_state.py @@ -1,12 +1,12 @@ -"""Resting state Paradigms +"""Resting state Paradigms. -Regroups paradigms for experience where we record the EEG -and the participant is not doing an active task, such -as focusing, counting or speaking. +Regroups paradigms for experience where we record the EEG and the +participant is not doing an active task, such as focusing, counting or +speaking. -Typically, a open/close eye experiment, where we -record the EEG of a subject while he is having the eye open or close -is a resting state experiment. +Typically, a open/close eye experiment, where we record the EEG of a +subject while he is having the eye open or close is a resting state +experiment. """ from scipy.signal import welch @@ -16,6 +16,7 @@ class RestingStateToP300Adapter(SinglePass): """Adapter to the P300 paradigm for resting state experiments. + It implements a SinglePass processing as for P300, except that: - the name of the event is free (it is not enforced to Target/NonTarget as for P300) - the default values are different. In particular, the length of the epochs is larger. diff --git a/moabb/paradigms/ssvep.py b/moabb/paradigms/ssvep.py index fb5a5d378..a4b39dd03 100644 --- a/moabb/paradigms/ssvep.py +++ b/moabb/paradigms/ssvep.py @@ -1,4 +1,4 @@ -"""Steady-State Visually Evoked Potentials Paradigms""" +"""Steady-State Visually Evoked Potentials Paradigms.""" import logging @@ -11,7 +11,7 @@ class BaseSSVEP(BaseParadigm): - """Base SSVEP Paradigm + """Base SSVEP Paradigm. Parameters ---------- @@ -63,6 +63,8 @@ def __init__( channels=None, resample=None, ): + """Init the BaseSSVEP function.""" + super().__init__( filters=filters, events=events, @@ -85,6 +87,7 @@ def __init__( assert n_classes <= len(self.events), "More classes than events specified" def is_valid(self, dataset): + """Check if dataset is valid for the SSVEP paradigm.""" ret = True if not (dataset.paradigm == "ssvep"): ret = False @@ -97,6 +100,7 @@ def is_valid(self, dataset): return ret def used_events(self, dataset): + """Return the mne events used for the dataset.""" out = {} if self.events is None: for k, v in dataset.event_id.items(): @@ -119,6 +123,16 @@ def used_events(self, dataset): return out def prepare_process(self, dataset): + """Prepare dataset for processing, and using events if needed. + + This function is called before the processing function, and is used to + prepare the dataset for processing. This includes: + get the events used for the paradigm, and set the filters if needed. + Parameters + ---------- + dataset: moabb.datasets.base.BaseDataset + Dataset to prepare. + """ event_id = self.used_events(dataset) # get filters @@ -131,6 +145,7 @@ def prepare_process(self, dataset): @property def datasets(self): + """List of datasets valid for the paradigm.""" if self.tmax is None: interval = None else: @@ -145,6 +160,12 @@ def datasets(self): @property def scoring(self): + """Return the default scoring method for this paradigm. + + If n_classes use the roc_auc, else use accuracy. More details + about this default scoring method can be found in the original + moabb paper. + """ if self.n_classes == 2: return "roc_auc" else: @@ -152,7 +173,7 @@ def scoring(self): class SSVEP(BaseSSVEP): - """Single bandpass filter SSVEP + """Single bandpass filter SSVEP. SSVEP paradigm with only one bandpass filter (default 7 to 45 Hz) Metric is 'roc-auc' if 2 classes and 'accuracy' if more @@ -200,59 +221,52 @@ class SSVEP(BaseSSVEP): """ def __init__(self, fmin=7, fmax=45, **kwargs): + """Init function for the SSVEP.""" if "filters" in kwargs.keys(): raise (ValueError("SSVEP does not take argument filters")) super().__init__(filters=[(fmin, fmax)], **kwargs) class FilterBankSSVEP(BaseSSVEP): - """Filtered bank n-class SSVEP paradigm + """Filtered bank n-class SSVEP paradigm. SSVEP paradigm with multiple narrow bandpass filters, centered around the frequencies of considered events. Metric is 'roc-auc' if 2 classes and 'accuracy' if more. - Parameters - ----------- - + ---------- filters: list of list | None (default None) If None, bandpass set around freqs of events with [f_n-0.5, f_n+0.5] - events: List of str, List of stimulation frequencies. If None, use all stimulus found in the dataset. - n_classes: int or None (default 2) Number of classes each dataset must have. All dataset classes if None - tmin: float (default 0.0) Start time (in second) of the epoch, relative to the dataset specific task interval e.g. tmin = 1 would mean the epoch will start 1 second after the beginning of the task as defined by the dataset. - tmax: float | None, (default None) End time (in second) of the epoch, relative to the beginning of the dataset specific task interval. tmax = 5 would mean the epoch will end 5 second after the beginning of the task as defined in the dataset. If None, use the dataset value. - baseline: None | tuple of length 2 - The time interval to consider as “baseline†when applying baseline - correction. If None, do not apply baseline correction. - If a tuple (a, b), the interval is between a and b (in seconds), - including the endpoints. - Correction is applied by computing the mean of the baseline period - and subtracting it from the data (see mne.Epochs) - + The time interval to consider as “baseline†when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) channels: list of str | None (default None) List of channel to select. If None, use all EEG channels available in the dataset. - resample: float | None (default None) If not None, resample the eeg data with the sampling rate provided. """ def __init__(self, filters=None, **kwargs): + """Init in the FilterBankSSVEP paradigm.""" super().__init__(filters=filters, **kwargs) @@ -261,7 +275,9 @@ class FakeSSVEPParadigm(BaseSSVEP): @property def datasets(self): + """Return a fake dataset with event list 13 and 15.""" return [FakeDataset(event_list=["13", "15"], paradigm="ssvep")] def is_valid(self, dataset): + """Overwrite the original function, always True in FakeDataset.""" return dataset.paradigm == "ssvep" diff --git a/moabb/pipelines/__init__.py b/moabb/pipelines/__init__.py index aba5634c9..9df82ce5c 100644 --- a/moabb/pipelines/__init__.py +++ b/moabb/pipelines/__init__.py @@ -1,7 +1,7 @@ -""" -Pipeline defines all steps required by an algorithm to obtain predictions. -Pipelines are typically a chain of sklearn compatible transformers and end -with a sklearn compatible estimator. +"""Pipeline defines all steps required by an algorithm to obtain predictions. + +Pipelines are typically a chain of sklearn compatible transformers and +end with a sklearn compatible estimator. """ # flake8: noqa diff --git a/moabb/pipelines/classification.py b/moabb/pipelines/classification.py index 845efbfc1..7fd7048ea 100644 --- a/moabb/pipelines/classification.py +++ b/moabb/pipelines/classification.py @@ -12,7 +12,7 @@ class SSVEP_CCA(BaseEstimator, ClassifierMixin): - """Classifier based on Canonical Correlation Analysis for SSVEP + """Classifier based on Canonical Correlation Analysis for SSVEP. A CCA is computed from the set of training signals and some pure sinusoids to act as reference. @@ -56,7 +56,7 @@ def __init__(self, interval, freqs, n_harmonics=3): self.one_hot[k] = i def fit(self, X, y, sample_weight=None): - """Compute reference sinusoid signal + """Compute reference sinusoid signal. These sinusoid are generated for each frequency in the dataset """ @@ -77,7 +77,7 @@ def fit(self, X, y, sample_weight=None): return self def predict(self, X): - """Predict is made by taking the maximum correlation coefficient""" + """Predict is made by taking the maximum correlation coefficient.""" y = [] for x in X: corr_f = {} @@ -89,7 +89,7 @@ def predict(self, X): return y def predict_proba(self, X): - """Probability could be computed from the correlation coefficient""" + """Probability could be computed from the correlation coefficient.""" P = np.zeros(shape=(len(X), len(self.freqs))) for i, x in enumerate(X): for j, f in enumerate(self.freqs): @@ -100,7 +100,8 @@ def predict_proba(self, X): class SSVEP_TRCA(BaseEstimator, ClassifierMixin): - """Classifier based on the Task-Related Component Analysis method [1]_ for SSVEP. + """Classifier based on the Task-Related Component Analysis method [1]_ for + SSVEP. Parameters ---------- @@ -325,7 +326,8 @@ def _compute_trca(self, X): return W[:, 0], W def fit(self, X, y): - """Extract spatial filters and templates from the given calibration data. + """Extract spatial filters and templates from the given calibration + data. Parameters ---------- @@ -534,7 +536,7 @@ def predict_proba(self, X): def _whitening(X): - """utility function to whiten EEG signal + """Utility function to whiten EEG signal. Parameters ---------- @@ -554,7 +556,7 @@ def _whitening(X): class SSVEP_MsetCCA(BaseEstimator, ClassifierMixin): - """Classifier based on MsetCCA for SSVEP + """Classifier based on MsetCCA for SSVEP. The MsetCCA method learns multiple linear transforms to extract SSVEP common features from multiple sets of EEG data. These are then used @@ -592,9 +594,8 @@ def __init__(self, freqs, n_filters=1, n_jobs=1): self.cca = CCA(n_components=1) def fit(self, X, y, sample_weight=None): - """ - Compute the optimized reference signal at each stimulus frequency - """ + """Compute the optimized reference signal at each stimulus + frequency.""" self.classes_ = np.unique(y) self.one_hot = {} for i, k in enumerate(self.classes_): @@ -638,7 +639,7 @@ def fit(self, X, y, sample_weight=None): return self def predict(self, X): - """Predict is made by taking the maximum correlation coefficient""" + """Predict is made by taking the maximum correlation coefficient.""" # Check is fit had been called check_is_fitted(self) @@ -653,7 +654,7 @@ def predict(self, X): return y def predict_proba(self, X): - """Probability could be computed from the correlation coefficient""" + """Probability could be computed from the correlation coefficient.""" # Check is fit had been called check_is_fitted(self) diff --git a/moabb/pipelines/csp.py b/moabb/pipelines/csp.py index eb79ae43a..f06af40dd 100644 --- a/moabb/pipelines/csp.py +++ b/moabb/pipelines/csp.py @@ -5,17 +5,17 @@ class TRCSP(CSP): - """ - Weighted Tikhonov-regularized CSP as described in Lotte and Guan 2011 - """ + """Weighted Tikhonov-regularized CSP as described in Lotte and Guan + 2011.""" def __init__(self, nfilter=4, metric="euclid", log=True, alpha=1): super().__init__(nfilter, metric, log) self.alpha = alpha def fit(self, X, y): - """ - Train spatial filters. Only deals with two class + """Train spatial filters. + + Only deals with two class """ if not isinstance(X, (np.ndarray, list)): diff --git a/moabb/pipelines/deep_learning.py b/moabb/pipelines/deep_learning.py index ab1cee525..2094473a7 100644 --- a/moabb/pipelines/deep_learning.py +++ b/moabb/pipelines/deep_learning.py @@ -1,7 +1,5 @@ -""" -Deep learning integrated in MOABB -Implementation using the tensorflow, keras and scikeras framework. -""" +"""Deep learning integrated in MOABB Implementation using the tensorflow, keras +and scikeras framework.""" # Authors: Igor Carrara # Bruno Aristimunha @@ -42,18 +40,17 @@ # ShallowConvNet # ===================================================================================== def square(x): - """ - Function to square the input tensor element-wise. + """Function to square the input tensor element-wise. + Element-wise square. """ return K.square(x) def log(x): - """ - Function to take the log of the input tensor element-wise. - We use a clip to avoid taking the log of 0. - min_value=1e-7, max_value=10000 + """Function to take the log of the input tensor element-wise. We use a clip + to avoid taking the log of 0. min_value=1e-7, max_value=10000. + Parameters ---------- x: tensor @@ -66,7 +63,8 @@ def log(x): class KerasShallowConvNet(KerasClassifier): - """Keras implementation of the Shallow Convolutional Network as described in [1]_. + """Keras implementation of the Shallow Convolutional Network as described + in [1]_. This implementation is taken from code by the Army Research Laboratory (ARL) at https://github.com/vlawhern/arl-eegmodels @@ -150,7 +148,8 @@ def _keras_build_fn(self, compile_kwargs: Dict[str, Any]): # DeepConvNet # ================================================================================= class KerasDeepConvNet(KerasClassifier): - """Keras implementation of the Deep Convolutional Network as described in [1]_. + """Keras implementation of the Deep Convolutional Network as described in + [1]_. This implementation is taken from code by the Army Research Laboratory (ARL) at https://github.com/vlawhern/arl-eegmodels diff --git a/moabb/pipelines/features.py b/moabb/pipelines/features.py index 4bfa0a004..d8fc67d8c 100644 --- a/moabb/pipelines/features.py +++ b/moabb/pipelines/features.py @@ -7,30 +7,39 @@ class LogVariance(BaseEstimator, TransformerMixin): - """LogVariance transformer""" + """LogVariance transformer.""" def fit(self, X, y): """fit.""" return self def transform(self, X): - """transform""" + """transform.""" assert X.ndim == 3 return np.log(np.var(X, -1)) class FM(BaseEstimator, TransformerMixin): - """Transformer to scale sampling frequency""" + """Transformer to scale sampling frequency.""" def __init__(self, freq=128): - """Instantaneous frequencies require a sampling frequency to be properly - scaled, which is helpful for some algorithms. This assumes 128 if not told - otherwise. + """Init function for FM transformer. + + Instantaneous frequencies require a sampling frequency to be + properly scaled, which is helpful for some algorithms. + + This assumes 128 if not told otherwise. + + Parameters + ---------- + freq: int + Sampling frequency of the signal. This is used to scale + the instantaneous frequency. """ self.freq = freq def fit(self, X, y): - """fit.""" + """Only for scikit-learn compatibility.""" return self def transform(self, X): @@ -40,26 +49,28 @@ def transform(self, X): class ExtendedSSVEPSignal(BaseEstimator, TransformerMixin): - """Prepare FilterBank SSVEP EEG signal for estimating extended covariances + """Prepare FilterBank SSVEP EEG signal for estimating extended covariances. - Riemannian approaches on SSVEP rely on extended covariances matrices, where - the filtered signals are contenated to estimate a large covariance matrice. + Riemannian approaches on SSVEP rely on extended covariances + matrices, where the filtered signals are contenated to estimate a + large covariance matrice. - FilterBank SSVEP EEG are of shape (n_trials, n_channels, n_times, n_freqs) - and should be convert in (n_trials, n_channels*n_freqs, n_times) to - estimate covariance matrices of (n_channels*n_freqs, n_channels*n_freqs). + FilterBank SSVEP EEG are of shape (n_trials, n_channels, n_times, + n_freqs) and should be convert in (n_trials, n_channels*n_freqs, + n_times) to estimate covariance matrices of (n_channels*n_freqs, + n_channels*n_freqs). """ def __init__(self): - """Empty init for ExtendedSSVEPSignal""" + """Empty init for ExtendedSSVEPSignal.""" pass def fit(self, X, y): - """No need to fit for ExtendedSSVEPSignal""" + """No need to fit for ExtendedSSVEPSignal.""" return self def transform(self, X): - """Transpose and reshape EEG for extended covmat estimation""" + """Transpose and reshape EEG for extended covmat estimation.""" out = X.transpose((0, 3, 1, 2)) n_trials, n_freqs, n_channels, n_times = out.shape out = out.reshape((n_trials, n_channels * n_freqs, n_times)) @@ -67,8 +78,10 @@ def transform(self, X): class AugmentedDataset(BaseEstimator, TransformerMixin): - """This transformation allow to create an embedding version of the current dataset. - The implementation and the application is described in [1]_. + """Dataset augmentation methods in a higher dimensional space. + + This transformation allow to create an embedding version of the current + dataset. The implementation and the application is described in [1]_. References ---------- @@ -103,9 +116,7 @@ def transform(self, X: ndarray): class StandardScaler_Epoch(BaseEstimator, TransformerMixin): - """ - Function to standardize the X raw data for the DeepLearning Method - """ + """Function to standardize the X raw data for the DeepLearning Method.""" def __init__(self): """Init.""" @@ -125,9 +136,7 @@ def transform(self, X): class Resampler_Epoch(BaseEstimator, TransformerMixin): - """ - Function that copies and resamples an epochs object - """ + """Function that copies and resamples an epochs object.""" def __init__(self, sfreq): self.sfreq = sfreq @@ -142,9 +151,7 @@ def transform(self, X: mne.Epochs): class Convert_Epoch_Array(BaseEstimator, TransformerMixin): - """ - Function that copies and resamples an epochs object - """ + """Function that copies and resamples an epochs object.""" def __init__(self): """Init.""" diff --git a/moabb/pipelines/utils.py b/moabb/pipelines/utils.py index 4b15e782a..f836bb01a 100644 --- a/moabb/pipelines/utils.py +++ b/moabb/pipelines/utils.py @@ -33,7 +33,6 @@ def create_pipeline_from_config(config): ------- pipeline : Pipeline sklearn Pipeline - """ components = [] @@ -73,9 +72,9 @@ def create_pipeline_from_config(config): def parse_pipelines_from_directory(dir_path): - """ - Takes in the path to a directory with pipeline configuration files and returns a dictionary - of pipelines. + """Takes in the path to a directory with pipeline configuration files and + returns a dictionary of pipelines. + Parameters ---------- dir_path: str @@ -136,10 +135,10 @@ def parse_pipelines_from_directory(dir_path): def generate_paradigms(pipeline_configs, context=None, logger=log): - """ - Takes in a dictionary of pipelines configurations as returned by - parse_pipelines_from_directory and returns a dictionary of unique paradigms with all pipeline - configurations compatible with that paradigm. + """Takes in a dictionary of pipelines configurations as returned by + parse_pipelines_from_directory and returns a dictionary of unique paradigms + with all pipeline configurations compatible with that paradigm. + Parameters ---------- pipeline_configs: @@ -154,7 +153,6 @@ def generate_paradigms(pipeline_configs, context=None, logger=log): paradigms: dict Dictionary of dictionaries with the unique paradigms and the configuration of the pipelines compatible with the paradigm - """ context = context or {} paradigms = OrderedDict() @@ -260,8 +258,8 @@ def __repr__(self): def filterbank(X, sfreq, idx_fb, peaks): - """ - Filter bank design for decomposing EEG data into sub-band components [1]_ + """Filter bank design for decomposing EEG data into sub-band components + [1]_ Parameters ---------- diff --git a/moabb/pipelines/utils_deep_model.py b/moabb/pipelines/utils_deep_model.py index 09e126bbe..fb4baf939 100644 --- a/moabb/pipelines/utils_deep_model.py +++ b/moabb/pipelines/utils_deep_model.py @@ -1,5 +1,5 @@ -""" -Utils for Deep learning integrated on MOABB. +"""Utils for Deep learning integrated on MOABB. + Implementation using the tensorflow, keras and scikeras framework. """ @@ -26,8 +26,7 @@ def EEGNet( data, input_layer, filters_1=8, kernel_size=64, depth=2, dropout=0.5, activation="elu" ): - """ - EEGNet block implementation as described in [1]_. + """EEGNet block implementation as described in [1]_. This implementation is taken from code by The Integrated Systems Laboratory of ETH Zurich at https://github.com/iis-eth-zurich/eeg-tcnet diff --git a/moabb/pipelines/utils_pytorch.py b/moabb/pipelines/utils_pytorch.py index 2ab3dfed9..b8e896482 100644 --- a/moabb/pipelines/utils_pytorch.py +++ b/moabb/pipelines/utils_pytorch.py @@ -13,16 +13,15 @@ # check if the data format is numpy or mne epoch def _check_data_format(X): - """ - Check if the data format is compatible with braindecode. - Expect values in the format of MNE objects. + """Check if the data format is compatible with braindecode. Expect values + in the format of MNE objects. + Parameters ---------- X: BaseConcatDataset Returns ------- - """ if not isinstance(X, mne.EpochsArray): raise ValueError( @@ -33,9 +32,8 @@ def _check_data_format(X): class BraindecodeDatasetLoader(BaseEstimator, TransformerMixin): - """ - Class to Load the data from MOABB in a format compatible with braindecode - """ + """Class to Load the data from MOABB in a format compatible with + braindecode.""" def __init__(self, drop_last_window=False, kw_args=None): self.drop_last_window = drop_last_window @@ -68,7 +66,7 @@ def __sklearn_is_fitted__(self): def get_shape_from_baseconcat(X, param_name): - """Get the shape of the data after BaseConcatDataset is applied""" + """Get the shape of the data after BaseConcatDataset is applied.""" if isinstance(X, BaseConcatDataset): in_channel = X[0][0].shape[0] input_window_samples = X[0][0].shape[1] diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index 016a7cc04..cb4bc53fd 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -32,7 +32,7 @@ def _run_tests_on_dataset(d): class Test_Datasets(unittest.TestCase): def test_fake_dataset(self): - """this test will insure the basedataset works""" + """This test will insure the basedataset works.""" n_subjects = 3 n_sessions = 2 n_runs = 2 @@ -140,7 +140,7 @@ def test_cache_dataset(self): shutil.rmtree(tempdir) def test_dataset_accept(self): - """verify that accept licence is working""" + """Verify that accept licence is working.""" # Only Shin2017 (bbci_eeg_fnirs) for now for ds in [Shin2017A(), Shin2017B()]: # if the data is already downloaded: @@ -192,7 +192,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def test_fake_dataset(self): - """this test will insure the basedataset works""" + """This test will insure the basedataset works.""" param_list = [(None, None), ("session_0", "run_0"), (["session_0"], ["run_0"])] for sessions, runs in param_list: with self.subTest(): diff --git a/moabb/tests/download.py b/moabb/tests/download.py index 2f5323ad9..e9dd33731 100644 --- a/moabb/tests/download.py +++ b/moabb/tests/download.py @@ -1,6 +1,4 @@ -""" -Tests to ensure that datasets download correctly -""" +"""Tests to ensure that datasets download correctly.""" import unittest import mne diff --git a/moabb/tests/evaluations.py b/moabb/tests/evaluations.py index 22411e9ad..31bc328cd 100644 --- a/moabb/tests/evaluations.py +++ b/moabb/tests/evaluations.py @@ -45,10 +45,11 @@ def __init__(self, kernel): class Test_WithinSess(unittest.TestCase): """This is actually integration testing but I don't know how to do this - better. A paradigm implements pre-processing so it needs files to run MNE + better. + + A paradigm implements pre-processing so it needs files to run MNE stuff on. To test the scoring and train/test we need to also have data and run it. Putting this on the future docket... - """ def setUp(self): @@ -146,12 +147,12 @@ def explicit_kernel(x): class Test_WithinSessLearningCurve(unittest.TestCase): - """ - Some tests for the learning curve evaluation. + """Some tests for the learning curve evaluation. - TODO if we ever extend dataset metadata, e.g. including y for example, we could get rid of a - lot of issues regarding valid inputs for policy per_class as this could be determined at - Evaluation initialization instead of during running the evaluation + TODO if we ever extend dataset metadata, e.g. including y for + example, we could get rid of a lot of issues regarding valid inputs + for policy per_class as this could be determined at Evaluation + initialization instead of during running the evaluation """ def test_correct_results_integrity(self): diff --git a/moabb/tests/paradigms.py b/moabb/tests/paradigms.py index 0d31b8f54..57ab15e0f 100644 --- a/moabb/tests/paradigms.py +++ b/moabb/tests/paradigms.py @@ -72,7 +72,7 @@ def test_BaseImagery_paradigm(self): ) def test_BaseImagery_channel_order(self): - """test if paradigm return correct channel order, see issue #227""" + """Test if paradigm return correct channel order, see issue #227.""" datasetA = FakeDataset(paradigm="imagery", channels=["C3", "Cz", "C4"]) datasetB = FakeDataset(paradigm="imagery", channels=["Cz", "C4", "C3"]) paradigm = SimpleMotorImagery(channels=["C4", "C3", "Cz"]) @@ -376,7 +376,7 @@ def test_BaseP300_paradigm(self): ) def test_BaseP300_channel_order(self): - """test if paradigm return correct channel order, see issue #227""" + """Test if paradigm return correct channel order, see issue #227.""" datasetA = FakeDataset( paradigm="p300", channels=["C3", "Cz", "C4"], @@ -574,7 +574,7 @@ def test_BaseSSVEP_paradigm(self): ) def test_BaseSSVEP_channel_order(self): - """test if paradigm return correct channel order, see issue #227""" + """Test if paradigm return correct channel order, see issue #227.""" datasetA = FakeDataset(paradigm="ssvep", channels=["C3", "Cz", "C4"]) datasetB = FakeDataset(paradigm="ssvep", channels=["Cz", "C4", "C3"]) paradigm = BaseSSVEP(channels=["C4", "C3", "Cz"]) diff --git a/moabb/tests/util_braindecode.py b/moabb/tests/util_braindecode.py index 2d4809708..29032daf2 100644 --- a/moabb/tests/util_braindecode.py +++ b/moabb/tests/util_braindecode.py @@ -13,7 +13,7 @@ @pytest.fixture(scope="module") def data(): - """Return EEG data from dataset to test transformer""" + """Return EEG data from dataset to test transformer.""" paradigm = SimpleMotorImagery() dataset = FakeDataset(paradigm="imagery") X, labels, metadata = paradigm.get_data(dataset, subjects=[1], return_epochs=True) @@ -37,20 +37,22 @@ def test_sklearn_is_fitted(self, data): assert transformer.__sklearn_is_fitted__() def test_transformer_fit(self, data): - """Test whether transformer can fit to some training data""" + """Test whether transformer can fit to some training data.""" X_train, y_train, _, _ = data transformer = BraindecodeDatasetLoader() assert transformer.fit(X_train, y_train) == transformer def test_transformer_transform_returns_dataset(self, data): - """Test whether the output of the transform method is a BaseConcatDataset""" + """Test whether the output of the transform method is a + BaseConcatDataset.""" X_train, y_train, _, _ = data transformer = BraindecodeDatasetLoader() dataset = transformer.fit(X_train, y_train).transform(X_train, y_train) assert isinstance(dataset, BaseConcatDataset) def test_transformer_transform_contents(self, data): - """Test whether the contents and metadata of a transformed dataset are correct""" + """Test whether the contents and metadata of a transformed dataset are + correct.""" X_train, y_train, _, _ = data transformer = BraindecodeDatasetLoader() dataset = transformer.fit(X_train, y_train).transform(X_train, y_train) @@ -62,7 +64,7 @@ def test_transformer_transform_contents(self, data): assert sample_epoch[1] == y_train[0] def test_sfreq_passed_through(self, data): - """Test if the sfreq parameter makes it through the transformer""" + """Test if the sfreq parameter makes it through the transformer.""" sfreq = 128.0 info = create_info(ch_names=["test"], sfreq=sfreq, ch_types=["eeg"]) data = np.random.normal(size=(2, 1, 10 * int(sfreq))) * 1e-6 @@ -74,19 +76,19 @@ def test_sfreq_passed_through(self, data): assert dataset.datasets[0].windows.info["sfreq"] == sfreq def test_kw_args_initialization(self): - """Test initializing the transformer with kw_args""" + """Test initializing the transformer with kw_args.""" kw_args = {"sampling_rate": 128} transformer = BraindecodeDatasetLoader(kw_args=kw_args) assert transformer.kw_args == kw_args def test_is_fitted_method(self): - """Test __sklearn_is_fitted__ returns True""" + """Test __sklearn_is_fitted__ returns True.""" transformer = BraindecodeDatasetLoader() is_fitter = transformer.__sklearn_is_fitted__() assert is_fitter def test_assert_raises_value_error(self, data): - """Test that an invalid argument gives a ValueError""" + """Test that an invalid argument gives a ValueError.""" X_train, y_train, _, _ = data transformer = BraindecodeDatasetLoader() invalid_param_name = "invalid" @@ -94,7 +96,7 @@ def test_assert_raises_value_error(self, data): transformer.fit(X_train, y=y_train, **{invalid_param_name: None}) def test_type_create_from_X_y_vs_transfomer(self, data): - """Test the type from create_from_X_y() and the transformer""" + """Test the type from create_from_X_y() and the transformer.""" X_train, y_train, _, _ = data dataset = create_from_X_y( @@ -111,13 +113,13 @@ def test_type_create_from_X_y_vs_transfomer(self, data): assert type(dataset_trans) == type(dataset) def test_wrong_input(self): - """Test that an invalid input raises a ValueError""" + """Test that an invalid input raises a ValueError.""" transformer = BraindecodeDatasetLoader() with pytest.raises(ValueError): transformer.fit_transform(np.random.normal(size=(2, 1, 10)), y=np.array([0])) def test_transformer_transform_with_custom_y(self, data): - """Test whether the provided y is used during transform""" + """Test whether the provided y is used during transform.""" X_train, y_train, _, _ = data transformer = BraindecodeDatasetLoader() @@ -137,7 +139,8 @@ def test_transformer_transform_with_custom_y(self, data): assert np.array_equal(dataset_test[1][1], y_test[1]) def test_transformer_transform_with_default_y(self, data): - """Test whether self.y is used when y is not provided during transform""" + """Test whether self.y is used when y is not provided during + transform.""" X_train, y_train, _, _ = data transformer = BraindecodeDatasetLoader() diff --git a/moabb/utils.py b/moabb/utils.py index 195ae34b0..addc0aab8 100644 --- a/moabb/utils.py +++ b/moabb/utils.py @@ -1,3 +1,4 @@ +"""Util functions for moabb.""" import logging import os import os.path as osp @@ -9,8 +10,8 @@ def _set_random_seed(seed: int) -> None: - """ - Set the seed for Python's built-in random module and numpy. + """Set the seed for Python's built-in random module and numpy. + Parameters ---------- seed: int @@ -24,8 +25,8 @@ def _set_random_seed(seed: int) -> None: def _set_tensorflow_seed(seed: int) -> None: - """ - Set the seed for TensorFlow. + """Set the seed for TensorFlow. + Parameters ---------- seed: int @@ -53,8 +54,8 @@ def _set_tensorflow_seed(seed: int) -> None: def _set_torch_seed(seed: int) -> None: - """ - Set the seed for PyTorch. + """Set the seed for PyTorch. + Parameters ---------- seed: int @@ -80,8 +81,8 @@ def _set_torch_seed(seed: int) -> None: def setup_seed(seed: int) -> None: - """ - Set the seed for random, numpy, TensorFlow and PyTorch. + """Set the seed for random, numpy, TensorFlow and PyTorch. + Parameters ---------- seed: int @@ -102,11 +103,10 @@ def setup_seed(seed: int) -> None: def set_log_level(level="INFO"): - """Set log level + """Set log level. - Set the general log level. - Use one of the levels supported by python logging, i.e.: - DEBUG, INFO, WARNING, ERROR, CRITICAL + Set the general log level. Use one of the levels supported by python + logging, i.e.: DEBUG, INFO, WARNING, ERROR, CRITICAL """ VALID_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] level = level.upper() @@ -120,7 +120,7 @@ def set_log_level(level="INFO"): def set_download_dir(path): - """Set the download directory if required to change from default mne path + """Set the download directory if required to change from default mne path. Parameters ---------- @@ -129,7 +129,6 @@ def set_download_dir(path): path is created If None, and MNE_DATA config does not exist, a warning is raised and the storage location is set to the MNE default directory - """ if path is None: if get_config("MNE_DATA") is None: diff --git a/scripts/generating_metainfo.py b/scripts/generating_metainfo.py index 5cbf44765..6ff18cba4 100644 --- a/scripts/generating_metainfo.py +++ b/scripts/generating_metainfo.py @@ -40,9 +40,8 @@ def parser_init(): def process_trial_freq(trials_per_events, prdgm): - """ - Function to process the trial frequency. - Getting the median value if the paradigm is MotorImagery. + """Function to process the trial frequency. Getting the median value if the + paradigm is MotorImagery. Parameters ---------- @@ -64,8 +63,7 @@ def process_trial_freq(trials_per_events, prdgm): def get_meta_info(dataset, dataset_name, paradigm, prdgm_name): - """ - Function to get the meta-information of a dataset. + """Function to get the meta-information of a dataset. Parameters ---------- @@ -80,7 +78,6 @@ def get_meta_info(dataset, dataset_name, paradigm, prdgm_name): Returns ------- - """ subjects = len(dataset.subject_list) session = dataset.n_sessions diff --git a/tutorials/tutorial_3_benchmarking_multiple_pipelines.py b/tutorials/tutorial_3_benchmarking_multiple_pipelines.py index 6f902e9b3..050403649 100644 --- a/tutorials/tutorial_3_benchmarking_multiple_pipelines.py +++ b/tutorials/tutorial_3_benchmarking_multiple_pipelines.py @@ -1,4 +1,4 @@ -""" +""" Basic tutorial on how to use MOABB. =========================================== Tutorial 3: Benchmarking multiple pipelines =========================================== diff --git a/tutorials/tutorial_4_adding_a_dataset.py b/tutorials/tutorial_4_adding_a_dataset.py index 31b4da29c..be938006d 100644 --- a/tutorials/tutorial_4_adding_a_dataset.py +++ b/tutorials/tutorial_4_adding_a_dataset.py @@ -33,7 +33,7 @@ def create_example_dataset(): - """Create a fake example for a dataset""" + """Create a fake example for a dataset.""" sfreq = 256 t_recording = 150 t_trial = 1 # duration of a trial @@ -96,10 +96,10 @@ def create_example_dataset(): class ExampleDataset(BaseDataset): - """ - Dataset used to exemplify the creation of a dataset class in MOABB. - The data samples have been simulated and has no physiological meaning - whatsoever. + """Dataset used to exemplify the creation of a dataset class in MOABB. + + The data samples have been simulated and has no physiological + meaning whatsoever. """ def __init__(self): @@ -114,7 +114,7 @@ def __init__(self): ) def _get_single_subject_data(self, subject): - """return data for a single subject""" + """Return data for a single subject.""" file_path_list = self.data_path(subject) data = loadmat(file_path_list[0]) @@ -133,7 +133,7 @@ def _get_single_subject_data(self, subject): def data_path( self, subject, path=None, force_update=False, update_path=None, verbose=None ): - """Download the data from one subject""" + """Download the data from one subject.""" if subject not in self.subject_list: raise (ValueError("Invalid subject number")) From 0757a3bfe463a1825d5470d6d2ef3c345b9bcf5c Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Tue, 1 Aug 2023 23:06:51 +0200 Subject: [PATCH 29/64] Add details in docstrings in bids_interface.py (#442) --- moabb/datasets/bids_interface.py | 38 ++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/moabb/datasets/bids_interface.py b/moabb/datasets/bids_interface.py index 55f2c98dc..f95f393df 100644 --- a/moabb/datasets/bids_interface.py +++ b/moabb/datasets/bids_interface.py @@ -143,7 +143,7 @@ def lock_file(self): ) def erase(self): - """Erase the cache of the subject.""" + """Erase the cache of the subject if it exists.""" log.info("Starting erasing cache of %s...", repr(self)) path = mne_bids.BIDSPath( root=self.root, @@ -155,7 +155,15 @@ def erase(self): log.info("Finished erasing cache of %s.", repr(self)) def load(self, preload=False): - """Load the cache of the subject.""" + """Load the cache of the subject if it exists and returns it as + a nested dictionary with the following structure:: + + sessions_data = {'session_id': + {'run_id': run} + } + + If the cache is not present, returns None. + """ log.info("Attempting to retrieve cache of %s...", repr(self)) self.lock_file.mkdir(exist_ok=True) if not self.lock_file.fpath.exists(): @@ -180,7 +188,18 @@ def load(self, preload=False): return sessions_data def save(self, sessions_data): - """Save the cache of the subject.""" + """Save the cache of the subject. + The data to be saved should be a nested dictionary + with the following structure:: + + sessions_data = {'session_id': + {'run_id': run} + } + + If a ``run`` is None, it will be skipped. + + The type of the ``run`` object can vary (see the subclases). + """ log.info("Starting caching %s", {repr(self)}) mne_bids.BIDSPath(root=self.root).mkdir(exist_ok=True) mne_bids.make_dataset_description( @@ -261,7 +280,10 @@ def _datatype(self): class BIDSInterfaceRawEDF(BIDSInterfaceBase): - """BIDS Interface for Raw EDF files. Selected .edf type only.""" + """BIDS Interface for Raw EDF files. Selected .edf type only. + + In this case, the ``run`` object (see the ``save()`` method) + is expected to be an ``mne.io.BaseRaw`` instance.""" @property def _extension(self): @@ -330,6 +352,10 @@ class BIDSInterfaceEpochs(BIDSInterfaceBase): """This interface is used to cache mne-epochs to disk. Pseudo-BIDS format is used to store the data. + + + In this case, the ``run`` object (see the ``save()`` method) + is expected to be an ``mne.Epochs`` instance. """ @property @@ -358,6 +384,10 @@ class BIDSInterfaceNumpyArray(BIDSInterfaceBase): """This interface is used to cache numpy arrays to disk. MOABB Pseudo-BIDS format is used to store the data. + + In this case, the ``run`` object (see the ``save()`` method) + is expected to be an ``OrderedDict`` with keys ``"X"`` and + ``"events"``. Both values are expected to be ``numpy.ndarray``. """ @property From 01e289933507a064883019e04f07c746cefdf741 Mon Sep 17 00:00:00 2001 From: Thomas Moreau Date: Fri, 4 Aug 2023 10:56:23 +0200 Subject: [PATCH 30/64] FIX empty dataset_list in python3.10 (#449) * FIX empty dataset_list in python3.10 * [pre-commit.ci] auto fixes from pre-commit.com hooks * DOC add what's new entry - FIX comment position due to pre-commit messing with the files --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/source/whats_new.rst | 2 ++ moabb/datasets/__init__.py | 6 ++++++ moabb/datasets/utils.py | 3 --- moabb/tests/datasets.py | 15 ++++++++++++++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index f7fcb0cf5..060b1eb22 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -59,6 +59,7 @@ Bugs - Fix :func:`moabb.benchmark` overwriting ``include_datasets`` list (:gh:`408` by `Pierre Guetschel`_) - Fix :func:`moabb.paradigms.base.BaseParadigm` using attributes before defining them (PR :gh:`408`, issue :gh:`425` by `Pierre Guetschel`_) - Fix :func:`moabb.paradigms.FakeImageryParadigm`, :func:`moabb.paradigms.FakeP300Paradigm` and :func:`moabb.paradigms.FakeSSVEPParadigm` ``is_valid`` methods to only accept the correct datasets (PR :gh:`408` by `Pierre Guetschel`_) +- Fix ``dataset_list`` construction, which could be empty due to bad import order (PR :gh:`449` by `Thomas Moreau`_). API changes ~~~~~~~~~~~ @@ -368,3 +369,4 @@ API changes .. _Jan Sosulski: https://github.com/jsosulski .. _Pierre Guetschel: https://github.com/PierreGtch .. _Ludovic Darmet: https://github.com/ludovicdmt +.. _Thomas Moreau: https://github.com/tommoral diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index e85bb27e9..6f42e3e16 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -46,5 +46,11 @@ from .ssvep_nakanishi import Nakanishi2015 from .ssvep_wang import Wang2016 from .upper_limb import Ofner2017 +from .utils import _init_dataset_list from .Weibo2014 import Weibo2014 from .Zhou2016 import Zhou2016 + + +# Call this last in order to make sure the dataset list contains all +# the datasets imported in this file. +_init_dataset_list() diff --git a/moabb/datasets/utils.py b/moabb/datasets/utils.py index 7262f5b2f..6bafff7ce 100644 --- a/moabb/datasets/utils.py +++ b/moabb/datasets/utils.py @@ -15,9 +15,6 @@ def _init_dataset_list(): dataset_list.append(ds[1]) -_init_dataset_list() - - def dataset_search( # noqa: C901 paradigm=None, multi_session=False, diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index cb4bc53fd..0be586e9c 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -1,13 +1,16 @@ +import inspect import shutil import tempfile import unittest import mne +from moabb import datasets as db from moabb.datasets import Shin2017A, Shin2017B, VirtualReality +from moabb.datasets.base import BaseDataset from moabb.datasets.compound_dataset import CompoundDataset from moabb.datasets.fake import FakeDataset, FakeVirtualRealityDataset -from moabb.datasets.utils import block_rep +from moabb.datasets.utils import block_rep, dataset_list from moabb.paradigms import P300 @@ -147,6 +150,16 @@ def test_dataset_accept(self): if mne.get_config("MNE_DATASETS_BBCIFNIRS_PATH") is None: self.assertRaises(AttributeError, ds.get_data, [1]) + def test_dataset_list(self): + all_datasets = len( + [ + issubclass(c, BaseDataset) + for c in db.__dict__.values() + if inspect.isclass(c) + ] + ) + assert len(dataset_list) == all_datasets + class Test_VirtualReality_Dataset(unittest.TestCase): def __init__(self, *args, **kwargs): From 3dc35244c60b32f017cda476f9dda304b4f2981f Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Wed, 9 Aug 2023 02:10:30 +0200 Subject: [PATCH 31/64] Small changes to tutorials (#456) * Fix comment fixed_interval example * Move references to the end of BIDS tutorial --- examples/plot_bids_conversion.py | 40 ++++++++++++------------- examples/plot_fixed_interval_windows.py | 8 ++--- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/examples/plot_bids_conversion.py b/examples/plot_bids_conversion.py index dce4deb0b..1decafdb8 100644 --- a/examples/plot_bids_conversion.py +++ b/examples/plot_bids_conversion.py @@ -12,7 +12,7 @@ BIDS [1]_ and [2]_. In this example, we will convert the AlexMI dataset to BIDS using the -option `cache_config=dict(path=temp_dir, save_raw=True)` of the get_data +option ``cache_config=dict(path=temp_dir, save_raw=True)`` of the ``get_data`` method from the dataset object. This will automatically save the raw data in the BIDS format and allow to use @@ -20,25 +20,6 @@ We will use the AlexMI dataset [3]_, one of the smallest in people and one that can be downloaded quickly. - -References ------------ - -.. [1] Pernet, C.R., Appelhoff, S., Gorgolewski, K.J. et al. EEG-BIDS, - An extension to the brain imaging data structure for - electroencephalography. Sci Data 6, 103 (2019). - https://doi.org/10.1038/s41597-019-0104-8 - -.. [2] Appelhoff et al., (2019). MNE-BIDS: Organizing electrophysiological - data into the BIDS format and facilitating their analysis. - Journal of Open Source Software, 4(44), 1896, - https://doi.org/10.21105/joss.01896 - -.. [3] Barachant, A., 2012. Commande robuste d'un effecteur par une - interface cerveau machine EEG asynchrone (Doctoral dissertation, - Université de Grenoble). - https://tel.archives-ouvertes.fr/tel-01196752 - """ # Authors: Pierre Guetschel # @@ -131,3 +112,22 @@ def print_tree(p: Path, last=True, header=""): # # Finally, we can delete the temporary folder: shutil.rmtree(temp_dir) + +############################################################################### +# References +# ----------- +# +# .. [1] Pernet, C.R., Appelhoff, S., Gorgolewski, K.J. et al. EEG-BIDS, +# An extension to the brain imaging data structure for +# electroencephalography. Sci Data 6, 103 (2019). +# https://doi.org/10.1038/s41597-019-0104-8 +# +# .. [2] Appelhoff et al., (2019). MNE-BIDS: Organizing electrophysiological +# data into the BIDS format and facilitating their analysis. +# Journal of Open Source Software, 4(44), 1896, +# https://doi.org/10.21105/joss.01896 +# +# .. [3] Barachant, A., 2012. Commande robuste d'un effecteur par une +# interface cerveau machine EEG asynchrone (Doctoral dissertation, +# Université de Grenoble). +# https://tel.archives-ouvertes.fr/tel-01196752 diff --git a/examples/plot_fixed_interval_windows.py b/examples/plot_fixed_interval_windows.py index 85e7723e2..38f61d1ae 100644 --- a/examples/plot_fixed_interval_windows.py +++ b/examples/plot_fixed_interval_windows.py @@ -54,7 +54,7 @@ length=100, stride=50, start_offset=300, - stop_offset=900, # we epoch 10 minutes per run, starting at 200 seconds + stop_offset=900, # we epoch 10 minutes per run, starting at 5 minutes (i.e. 300 seconds) # parameters common with other paradigms: resample=100, fmin=7, @@ -71,10 +71,10 @@ print(f"{column}s: {metadata[column].unique()}") ############################################################################### -# We expect to obtained ``(stop_offset - start_offset - length) / stride`` -# = (900-300-100)/50 = 10 epochs per run. Here we have 3*2=6 runs. +# We expect to obtained ``(stop_offset - start_offset - length) / stride``; +# i.e. :math:`(900-300-100)/50=10` epochs per run. Here we have 3*2=6 runs. # And indeed, we obtain -# a total of 6*10=60 epochs: +# a total of :math:`6*10=60` epochs: print(f"Number of epochs: {len(X)}") ############################################################################### From 5bab7d618aedff5b2c1769b270a91d0774159b73 Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Wed, 9 Aug 2023 14:30:32 +0200 Subject: [PATCH 32/64] Dataset codes (#448) * Check if dataset code is camel case * Update BIDS cache location * Update FakeDataset code * Add test init datasets * Remove spaces from dataset codes * Remove tailing "dataset" from dataset codes * Standardize tailing paradigm in codes with separating dash * Relax dataset name constraint * Update fake dataset code * Update benchmark test * Update whats_new.rst * Fix hardcoded dataset codes * Fix dataset name in tutorial_4_adding_a_dataset.py * Fix dataset codes in examples * Small fix --------- Co-authored-by: Bru --- docs/source/whats_new.rst | 1 + examples/plot_benchmark.py | 2 +- examples/plot_benchmark_grid_search.py | 2 +- moabb/datasets/Lee2019.py | 2 +- moabb/datasets/Weibo2014.py | 2 +- moabb/datasets/Zhou2016.py | 2 +- moabb/datasets/alex_mi.py | 2 +- moabb/datasets/base.py | 15 ++++- moabb/datasets/bids_interface.py | 12 +++- moabb/datasets/braininvaders.py | 70 ++++++++++++------------ moabb/datasets/epfl.py | 2 +- moabb/datasets/fake.py | 11 ++-- moabb/datasets/huebner_llp.py | 4 +- moabb/datasets/mpi_mi.py | 2 +- moabb/datasets/neiry.py | 2 +- moabb/datasets/physionet_mi.py | 2 +- moabb/datasets/sosulski2019.py | 2 +- moabb/datasets/ssvep_exo.py | 2 +- moabb/datasets/ssvep_mamem.py | 20 ++++--- moabb/datasets/ssvep_nakanishi.py | 2 +- moabb/datasets/ssvep_wang.py | 5 +- moabb/tests/benchmark.py | 9 ++- moabb/tests/datasets.py | 9 ++- tutorials/tutorial_4_adding_a_dataset.py | 4 +- 24 files changed, 112 insertions(+), 74 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 060b1eb22..1be4bcfe1 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -39,6 +39,7 @@ Enhancements - Systematically set the annotations when loading data, eventually using the stim channel (PR :gh:`408` by `Pierre Guetschel`_) - Allow :func:`moabb.datasets.utils.dataset_search` to search across paradigms ``paradigm=None`` (PR :gh:`408` by `Pierre Guetschel`_) - Improving the review processing with more pre-commit bots (:gh:`435` by `Bruno Aristimunha`_) +- Update all dataset codes to remove white spaces and underscores (:gh:`448` by `Pierre Guetschel`_) Bugs ~~~~ diff --git a/examples/plot_benchmark.py b/examples/plot_benchmark.py index eea01867e..9430b174f 100644 --- a/examples/plot_benchmark.py +++ b/examples/plot_benchmark.py @@ -85,7 +85,7 @@ pipelines="./sample_pipelines/", evaluations=["WithinSession"], paradigms=["LeftRightImagery"], - include_datasets=["Zhou 2016"], + include_datasets=["Zhou2016"], results="./results/", overwrite=False, plot=False, diff --git a/examples/plot_benchmark_grid_search.py b/examples/plot_benchmark_grid_search.py index 6d13537dd..7883592ef 100644 --- a/examples/plot_benchmark_grid_search.py +++ b/examples/plot_benchmark_grid_search.py @@ -50,7 +50,7 @@ pipelines="./pipelines_grid/", evaluations=["WithinSession"], paradigms=["LeftRightImagery"], - include_datasets=["Zhou 2016"], + include_datasets=["Zhou2016"], results="./results/", overwrite=False, plot=False, diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py index 040416bae..5b57b60ea 100644 --- a/moabb/datasets/Lee2019.py +++ b/moabb/datasets/Lee2019.py @@ -64,7 +64,7 @@ def __init__( subjects=list(range(1, 55)), sessions_per_subject=2, events=events, - code="Lee2019_" + code_suffix, + code="Lee2019-" + code_suffix, interval=interval, paradigm=paradigm, doi="10.5524/100542", diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py index a0efe7c12..d8b15c08d 100644 --- a/moabb/datasets/Weibo2014.py +++ b/moabb/datasets/Weibo2014.py @@ -120,7 +120,7 @@ def __init__(self): right_hand_left_foot=6, rest=7, ), - code="Weibo 2014", + code="Weibo2014", # Full trial w/ rest is 0-8 interval=[3, 7], paradigm="imagery", diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py index b4eb1e841..03d0d15de 100644 --- a/moabb/datasets/Zhou2016.py +++ b/moabb/datasets/Zhou2016.py @@ -90,7 +90,7 @@ def __init__(self): subjects=list(range(1, 5)), sessions_per_subject=3, events=dict(left_hand=1, right_hand=2, feet=3), - code="Zhou 2016", + code="Zhou2016", # MI 1-6s, prepare 0-1, break 6-10 # boundary effects interval=[0, 5], diff --git a/moabb/datasets/alex_mi.py b/moabb/datasets/alex_mi.py index 8b874a865..1e35ecadd 100644 --- a/moabb/datasets/alex_mi.py +++ b/moabb/datasets/alex_mi.py @@ -49,7 +49,7 @@ def __init__(self): subjects=list(range(1, 9)), sessions_per_subject=1, events=dict(right_hand=2, feet=3, rest=4), - code="Alexandre Motor Imagery", + code="AlexandreMotorImagery", interval=[0, 3], paradigm="imagery", ) diff --git a/moabb/datasets/base.py b/moabb/datasets/base.py index 9eae59410..e3cc9b93c 100644 --- a/moabb/datasets/base.py +++ b/moabb/datasets/base.py @@ -1,6 +1,7 @@ """Base class for a dataset.""" import abc import logging +import re import traceback from dataclasses import dataclass from enum import Enum @@ -125,6 +126,11 @@ def apply_step(pipeline, obj): raise error +def is_camel_kebab_case(name): + """Check if a string is in CamelCase but can also contain dashes.""" + return re.fullmatch(r"[a-zA-Z0-9\-]+", name) is not None + + class BaseDataset(metaclass=abc.ABCMeta): """Abstract Moabb BaseDataset. @@ -154,7 +160,8 @@ class BaseDataset(metaclass=abc.ABCMeta): - word_ass (for word association) code: string - Unique identifier for dataset, used in all plots + Unique identifier for dataset, used in all plots. + The code should be in CamelCase. interval: list with 2 entries Imagery interval as defined in the dataset description @@ -182,6 +189,12 @@ def __init__( except TypeError: raise ValueError("subjects must be a iterable, like a list") from None + if not is_camel_kebab_case(code): + raise ValueError( + f"code {code!r} must be in Camel-KebabCase; " + "i.e. use CamelCase, and add dashes where absolutely necessary." + ) + self.subject_list = subjects self.n_sessions = sessions_per_subject self.event_id = events diff --git a/moabb/datasets/bids_interface.py b/moabb/datasets/bids_interface.py index f95f393df..011815fe8 100644 --- a/moabb/datasets/bids_interface.py +++ b/moabb/datasets/bids_interface.py @@ -15,6 +15,7 @@ import datetime import json import logging +import re from collections import OrderedDict from dataclasses import dataclass from pathlib import Path @@ -38,6 +39,12 @@ log = logging.getLogger(__name__) +def camel_to_kebab_case(name): + """Converts a CamelCase string to kebab-case.""" + name = re.sub("(.)([A-Z][a-z]+)", r"\1-\2", name) + return re.sub("([a-z0-9])([A-Z])", r"\1-\2", name).lower() + + def subject_moabb_to_bids(subject: int): """Convert the subject number to string (subject).""" return str(subject) @@ -119,11 +126,10 @@ def __repr__(self): @property def root(self): """Return the root path of the BIDS dataset.""" - code = self.dataset.code + "-BIDS" + code = self.dataset.code mne_path = Path(dl.get_dataset_path(code, self.path)) - cache_dir = f"MNE-{code.lower()}-cache" + cache_dir = f"MNE-BIDS-{camel_to_kebab_case(code)}" cache_path = mne_path / cache_dir - return cache_path @property diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index 9cc2872a7..b3dbd5a48 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -34,24 +34,24 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 for file_path in file_path_list: if ds.code in [ - "Brain Invaders 2012", - "Brain Invaders 2014a", - "Brain Invaders 2014b", - "Brain Invaders 2015b", + "BrainInvaders2012", + "BrainInvaders2014a", + "BrainInvaders2014b", + "BrainInvaders2015b", ]: session_name = "session_1" - elif ds.code == "Brain Invaders 2013a": + elif ds.code == "BrainInvaders2013a": session_number = file_path.split(os.sep)[-2].replace("Session", "") session_name = "session_" + session_number - elif ds.code == "Brain Invaders 2015a": + elif ds.code == "BrainInvaders2015a": session_name = f'session_{file_path.split("_")[-1][1:2]}' - elif ds.code == "P300-VR": + elif ds.code == "VR-P300": session_name = file_path.split(".")[0].split("_")[-1] if session_name not in sessions.keys(): sessions[session_name] = {} - if ds.code == "Brain Invaders 2012": + if ds.code == "BrainInvaders2012": condition = file_path.split("/")[-1].split(".")[0].split(os.sep)[-1] run_name = "run_" + condition # fmt: off @@ -66,7 +66,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 stim = (X[18, :] + X[19, :])[None, :] X = np.concatenate([S, stim]) sfreq = 128 - elif ds.code == "Brain Invaders 2013a": + elif ds.code == "BrainInvaders2013a": run_number = file_path.split(os.sep)[-1] run_number = run_number.split("_")[-1] run_number = run_number.split(".mat")[0] @@ -80,7 +80,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 chtypes = ["eeg"] * 16 + ["stim"] X = loadmat(file_path)["data"].T sfreq = 512 - elif ds.code == "Brain Invaders 2014a": + elif ds.code == "BrainInvaders2014a": run_name = "run_1" # fmt: off chnames = [ @@ -95,7 +95,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 stim = D[-1, :] X = np.concatenate([S, stim[None, :]]) sfreq = 512 - elif ds.code == "Brain Invaders 2014b": + elif ds.code == "BrainInvaders2014b": # fmt: off chnames = [ 'Fp1', 'Fp2', 'AFz', 'F7', 'F3', 'F4', 'F8', 'FC5', 'FC1', 'FC2', @@ -114,7 +114,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 stim = D[-1, :] X = np.concatenate([S, stim[None, :]]) sfreq = 512 - elif ds.code == "Brain Invaders 2015a": + elif ds.code == "BrainInvaders2015a": run_name = "run_1" # fmt: off chnames = [ @@ -129,7 +129,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 stim = D[-2, :] + D[-1, :] X = np.concatenate([S, stim[None, :]]) sfreq = 512 - elif ds.code == "Brain Invaders 2015b": + elif ds.code == "BrainInvaders2015b": run_name = "run_" + file_path.split("_")[-1].split(".")[0][1] # fmt: off chnames = [ @@ -152,7 +152,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 stim[idx_nontarget] = 1 X = np.concatenate([S, stim[None, :]]) sfreq = 512 - elif ds.code == "P300-VR": + elif ds.code == "VR-P300": data = loadmat(os.path.join(file_path, os.listdir(file_path)[0]))["data"] chnames = [ @@ -189,11 +189,11 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 verbose=False, ) - if not ds.code == "P300-VR": + if not ds.code == "VR-P300": raw = mne.io.RawArray(data=X, info=info, verbose=False) raw.set_montage(make_standard_montage("standard_1020")) - if ds.code == "Brain Invaders 2012": + if ds.code == "BrainInvaders2012": # get rid of the Fz channel (it is the ground) raw.info["bads"] = ["Fz"] raw.pick_types(eeg=True, stim=True) @@ -230,7 +230,7 @@ def _bi_data_path( # noqa: C901 raise (ValueError("Invalid subject number")) subject_paths = [] - if ds.code == "Brain Invaders 2012": + if ds.code == "BrainInvaders2012": # check if has the .zip url = f"{BI2012a_URL}subject_{subject:02}.zip" path_zip = dl.data_dl(url, "BRAININVADERS2012") @@ -253,7 +253,7 @@ def _bi_data_path( # noqa: C901 osp.join(f"{path_folder}subject_{subject:02}", "online.mat") ) - elif ds.code == "Brain Invaders 2013a": + elif ds.code == "BrainInvaders2013a": if subject in [1, 2, 3, 4, 5, 6, 7]: zipname_list = [ f"subject{subject:02}_session{i:02}.zip" for i in range(1, 8 + 1) @@ -305,7 +305,7 @@ def _bi_data_path( # noqa: C901 osp.join(directory, "Session*", filename.replace(".gdf", ".mat")) ) - elif ds.code == "Brain Invaders 2014a": + elif ds.code == "BrainInvaders2014a": url = f"{BI2014a_URL}subject_{subject:02}.zip" path_zip = dl.data_dl(url, "BRAININVADERS2014A") path_folder = path_zip.strip(f"subject_{subject:02}.zip") @@ -320,7 +320,7 @@ def _bi_data_path( # noqa: C901 # filter the data regarding the experimental conditions subject_paths.append(osp.join(path_folder_subject, f"subject_{subject:02}.mat")) - elif ds.code == "Brain Invaders 2014b": + elif ds.code == "BrainInvaders2014b": group = (subject + 1) // 2 url = f"{BI2014b_URL}group_{group:02}_mat.zip" path_zip = dl.data_dl(url, "BRAININVADERS2014B") @@ -346,7 +346,7 @@ def _bi_data_path( # noqa: C901 # Collaborative session are not loaded # subject_paths.append(osp.join(path_folder_subject, f'group_{(subject+1)//2:02}.mat') - elif ds.code == "Brain Invaders 2015a": + elif ds.code == "BrainInvaders2015a": # TODO: possible fusion with 2014a? url = f"{BI2015a_URL}subject_{subject:02}_mat.zip" path_zip = dl.data_dl(url, "BRAININVADERS2015A") @@ -367,13 +367,13 @@ def _bi_data_path( # noqa: C901 path_folder_subject, f"subject_{subject:02}_session_{session:02}.mat" ) ) - elif ds.code == "Brain Invaders 2015b": + elif ds.code == "BrainInvaders2015b": # TODO: possible fusion with 2014b? - url = f"{BI2015b_URL}group_{(subject+1)//2:02}_mat.zip" + url = f"{BI2015b_URL}group_{(subject + 1) // 2:02}_mat.zip" path_zip = dl.data_dl(url, "BRAININVADERS2015B") - path_folder = path_zip.strip(f"group_{(subject+1)//2:02}_mat.zip") + path_folder = path_zip.strip(f"group_{(subject + 1) // 2:02}_mat.zip") # check if has to unzip - path_folder_subject = f"{path_folder}group_{(subject+1)//2:02}" + path_folder_subject = f"{path_folder}group_{(subject + 1) // 2:02}" if not (osp.isdir(path_folder_subject)): os.mkdir(path_folder_subject) zip_ref = z.ZipFile(path_zip, "r") @@ -383,12 +383,12 @@ def _bi_data_path( # noqa: C901 subject_paths = [ osp.join( path_folder, - f"group_{(subject+1)//2:02}", - f"group_{(subject+1)//2:02}_s{i}", + f"group_{(subject + 1) // 2:02}", + f"group_{(subject + 1) // 2:02}_s{i}", ) for i in range(1, 5) ] - elif ds.code == "P300-VR": + elif ds.code == "VR-P300": subject_paths = [] if ds.virtual_reality: url = "{:s}subject_{:02d}_{:s}.mat".format(VIRTUALREALITY_URL, subject, "VR") @@ -449,7 +449,7 @@ def __init__(self, Training=True, Online=False): subjects=list(range(1, 26)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), - code="Brain Invaders 2012", + code="BrainInvaders2012", interval=[0, 1], paradigm="p300", doi="https://doi.org/10.5281/zenodo.2649006", @@ -549,7 +549,7 @@ def __init__(self, NonAdaptive=True, Adaptive=False, Training=True, Online=False subjects=list(range(1, 25)), sessions_per_subject=1, events=dict(Target=33285, NonTarget=33286), - code="Brain Invaders 2013a", + code="BrainInvaders2013a", interval=[0, 1], paradigm="p300", doi="https://doi.org/10.5281/zenodo.2669187", @@ -612,7 +612,7 @@ def __init__(self): subjects=list(range(1, 65)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), - code="Brain Invaders 2014a", + code="BrainInvaders2014a", interval=[0, 1], paradigm="p300", doi="https://doi.org/10.5281/zenodo.3266222", @@ -671,7 +671,7 @@ def __init__(self): subjects=list(range(1, 38)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), - code="Brain Invaders 2014b", + code="BrainInvaders2014b", interval=[0, 1], paradigm="p300", doi="https://doi.org/10.5281/zenodo.3267301", @@ -731,7 +731,7 @@ def __init__(self): subjects=list(range(1, 44)), sessions_per_subject=3, events=dict(Target=2, NonTarget=1), - code="Brain Invaders 2015a", + code="BrainInvaders2015a", interval=[0, 1], paradigm="p300", doi="https://doi.org/10.5281/zenodo.3266929", @@ -794,7 +794,7 @@ def __init__(self): subjects=list(range(1, 45)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), - code="Brain Invaders 2015b", + code="BrainInvaders2015b", interval=[0, 1], paradigm="p300", doi="https://doi.org/10.5281/zenodo.3267307", @@ -863,7 +863,7 @@ def __init__(self, virtual_reality=False, screen_display=True): subjects=list(range(1, 21 + 1)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), - code="P300-VR", + code="VR-P300", interval=[0, 1.0], paradigm="p300", doi="https://doi.org/10.5281/zenodo.2605204", diff --git a/moabb/datasets/epfl.py b/moabb/datasets/epfl.py index d4eeba8cd..399a23698 100644 --- a/moabb/datasets/epfl.py +++ b/moabb/datasets/epfl.py @@ -75,7 +75,7 @@ def __init__(self): subjects=[1, 2, 3, 4, 6, 7, 8, 9], sessions_per_subject=4, events=dict(Target=2, NonTarget=1), - code="EPFL P300 dataset", + code="EPFL-P300", interval=[0, 1], paradigm="p300", doi="10.1016/j.jneumeth.2007.03.005", diff --git a/moabb/datasets/fake.py b/moabb/datasets/fake.py index dca29e657..8e0fc0ce3 100644 --- a/moabb/datasets/fake.py +++ b/moabb/datasets/fake.py @@ -19,7 +19,7 @@ class FakeDataset(BaseDataset): Parameters ---------- event_list: list or tuple of str - List of event to generate, default: ("fake_c1", "fake_c2", "fake_c3") + List of event to generate, default: ("fake1", "fake2", "fake3") n_sessions: int, default 2 Number of session to generate n_runs: int, default 2 @@ -36,7 +36,7 @@ class FakeDataset(BaseDataset): def __init__( self, - event_list=("fake_c1", "fake_c2", "fake_c3"), + event_list=("fake1", "fake2", "fake3"), n_sessions=2, n_runs=2, n_subjects=10, @@ -47,7 +47,10 @@ def __init__( self.n_runs = n_runs event_id = {ev: ii + 1 for ii, ev in enumerate(event_list)} self.channels = channels - code = f"{code}_{paradigm}_{n_subjects}_{n_sessions}_{n_runs}__{'_'.join(event_list)}__{'_'.join(channels)}" + code = ( + f"{code}-{paradigm.capitalize()}-{n_subjects}-{n_sessions}-{n_runs}-" + f"{''.join([e.replace('_', '').capitalize() for e in event_list])}-{''.join(channels)}" + ) super().__init__( subjects=list(range(1, n_subjects + 1)), sessions_per_subject=n_sessions, @@ -56,7 +59,7 @@ def __init__( interval=[0, 3], paradigm=paradigm, ) - key = "MNE_DATASETS_{:s}-BIDS_PATH".format(self.code.upper()) + key = "MNE_DATASETS_{:s}_PATH".format(self.code.upper()) temp_dir = get_config(key) if temp_dir is None or not Path(temp_dir).is_dir(): temp_dir = tempfile.mkdtemp() diff --git a/moabb/datasets/huebner_llp.py b/moabb/datasets/huebner_llp.py index d23d1b327..b96847b4c 100644 --- a/moabb/datasets/huebner_llp.py +++ b/moabb/datasets/huebner_llp.py @@ -172,7 +172,7 @@ def __init__(self, interval=None, raw_slice_offset=None, use_blocks_as_sessions= raw_slice_offset=raw_slice_offset, n_subjects=13, sessions_per_subject=1, # if varying, take minimum - code="Visual Speller LLP", + code="VisualSpellerLLP", interval=interval, doi=llp_speller_paper_doi, use_blocks_as_sessions=use_blocks_as_sessions, @@ -231,7 +231,7 @@ def __init__(self, interval=None, raw_slice_offset=None, use_blocks_as_sessions= raw_slice_offset=raw_slice_offset, n_subjects=12, sessions_per_subject=1, # if varying, take minimum - code="Visual Speller MIX", + code="VisualSpellerMIX", interval=interval, doi=mix_speller_paper_doi, use_blocks_as_sessions=use_blocks_as_sessions, diff --git a/moabb/datasets/mpi_mi.py b/moabb/datasets/mpi_mi.py index bf0de31d4..c1df63ed2 100644 --- a/moabb/datasets/mpi_mi.py +++ b/moabb/datasets/mpi_mi.py @@ -67,7 +67,7 @@ def __init__(self): subjects=list(range(1, 11)), sessions_per_subject=1, events=dict(right_hand=2, left_hand=1), - code="Grosse-Wentrup 2009", + code="GrosseWentrup2009", interval=[0, 7], paradigm="imagery", doi="10.1109/TBME.2008.2009768", diff --git a/moabb/datasets/neiry.py b/moabb/datasets/neiry.py index 43b79462e..0b7afb7f8 100644 --- a/moabb/datasets/neiry.py +++ b/moabb/datasets/neiry.py @@ -110,7 +110,7 @@ def __init__(self): subjects=list(range(60)), sessions_per_subject=1, events={"Target": 1, "NonTarget": 2}, - code="Demons P300", + code="Demons-P300", interval=[0, 1], paradigm="p300", ) diff --git a/moabb/datasets/physionet_mi.py b/moabb/datasets/physionet_mi.py index 9e156cbbc..5628475e8 100644 --- a/moabb/datasets/physionet_mi.py +++ b/moabb/datasets/physionet_mi.py @@ -81,7 +81,7 @@ def __init__(self, imagined=True, executed=False): subjects=list(range(1, 110)), sessions_per_subject=1, events=dict(left_hand=2, right_hand=3, feet=5, hands=4, rest=1), - code="Physionet Motor Imagery", + code="PhysionetMotorImagery", # website does not specify how long the trials are, but the # interval between 2 trial is 4 second. interval=[0, 3], diff --git a/moabb/datasets/sosulski2019.py b/moabb/datasets/sosulski2019.py index 291a9ab08..84a97956e 100644 --- a/moabb/datasets/sosulski2019.py +++ b/moabb/datasets/sosulski2019.py @@ -104,7 +104,7 @@ def __init__( self.n_channels = 31 self.use_soas_as_sessions = use_soas_as_sessions self.description_map = {"Stimulus/S 21": "Target", "Stimulus/S 1": "NonTarget"} - code = "Spot Pilot P300 dataset" + code = "SpotPilot-P300" interval = [-0.2, 1] if interval is None else interval super().__init__( subjects=list(range(1, 13 + 1)), diff --git a/moabb/datasets/ssvep_exo.py b/moabb/datasets/ssvep_exo.py index 3584e2331..7deb23c54 100644 --- a/moabb/datasets/ssvep_exo.py +++ b/moabb/datasets/ssvep_exo.py @@ -60,7 +60,7 @@ def __init__(self): subjects=list(range(1, 13)), sessions_per_subject=1, events={"13": 2, "17": 3, "21": 4, "rest": 1}, - code="Exoskeleton_SSVEP", + code="Exoskeleton-SSVEP", interval=[2, 4], paradigm="ssvep", doi="10.1016/j.neucom.2016.01.007", diff --git a/moabb/datasets/ssvep_mamem.py b/moabb/datasets/ssvep_mamem.py index 56af02ee3..c90cc0671 100644 --- a/moabb/datasets/ssvep_mamem.py +++ b/moabb/datasets/ssvep_mamem.py @@ -23,6 +23,8 @@ log = logging.getLogger(__name__) MAMEM_URL = "https://ndownloader.figshare.com/files/" + + # Specific release # MAMEM1_URL = 'https://ndownloader.figshare.com/articles/2068677/versions/6' # MAMEM2_URL = 'https://ndownloader.figshare.com/articles/3153409/versions/4' @@ -107,13 +109,13 @@ def _get_single_subject_data(self, subject): if fnamed[4] == "x": continue session_name = "session_0" - if self.code == "MAMEM3_SSVEP": + if self.code == "MAMEM3-SSVEP": repetition = len(fnamed) - 10 - run_name = f"run_{(ord(fnamed[4])-97)*2 + repetition}" + run_name = f"run_{(ord(fnamed[4]) - 97) * 2 + repetition}" else: - run_name = f"run_{ord(fnamed[4])-97}" + run_name = f"run_{ord(fnamed[4]) - 97}" - if self.code == "MAMEM3_SSVEP": + if self.code == "MAMEM3-SSVEP": m = loadmat(fpath) ch_names = [e[0] for e in m["info"][0, 0][9][0]] sfreq = 128 @@ -124,7 +126,7 @@ def _get_single_subject_data(self, subject): ch_names = [f"E{i + 1}" for i in range(0, 256)] ch_names.append("stim") sfreq = 250 - if self.code == "MAMEM2_SSVEP": + if self.code == "MAMEM2-SSVEP": labels = m["labels"] else: labels = None @@ -149,7 +151,7 @@ def data_path( raise (ValueError("Invalid subject number")) sub = f"{subject:02d}" - sign = self.code.split("_")[0] + sign = self.code.split("-")[0] key_dest = f"MNE-{sign.lower():s}-data" path = osp.join(get_dataset_path(sign, path), key_dest) @@ -279,7 +281,7 @@ def __init__(self): events={"6.66": 1, "7.50": 2, "8.57": 3, "10.00": 4, "12.00": 5}, sessions_per_subject=1, # 5 runs per sessions, except 3 for S001, S003, S008, 4 for S004 - code="MAMEM1_SSVEP", + code="MAMEM1-SSVEP", doi="https://arxiv.org/abs/1602.00904", figshare_id=2068677, ) @@ -372,7 +374,7 @@ def __init__(self): super().__init__( events={"6.66": 1, "7.50": 2, "8.57": 3, "10.00": 4, "12.00": 5}, sessions_per_subject=1, - code="MAMEM2_SSVEP", + code="MAMEM2-SSVEP", doi="https://arxiv.org/abs/1602.00904", figshare_id=3153409, ) @@ -480,7 +482,7 @@ def __init__(self): "12.00": 33025, }, sessions_per_subject=1, - code="MAMEM3_SSVEP", + code="MAMEM3-SSVEP", doi="https://arxiv.org/abs/1602.00904", figshare_id=3413851, ) diff --git a/moabb/datasets/ssvep_nakanishi.py b/moabb/datasets/ssvep_nakanishi.py index 342da58d9..058006e94 100644 --- a/moabb/datasets/ssvep_nakanishi.py +++ b/moabb/datasets/ssvep_nakanishi.py @@ -61,7 +61,7 @@ def __init__(self): "12.75": 11, "14.75": 12, }, - code="Nakanishi_SSVEP", + code="Nakanishi-SSVEP", interval=[0.15, 4.3], paradigm="ssvep", doi="doi.org/10.1371/journal.pone.0140703", diff --git a/moabb/datasets/ssvep_wang.py b/moabb/datasets/ssvep_wang.py index 0dd05d210..572874a15 100644 --- a/moabb/datasets/ssvep_wang.py +++ b/moabb/datasets/ssvep_wang.py @@ -16,6 +16,8 @@ # WANG_URL = 'http://bci.med.tsinghua.edu.cn/upload/yijun/' # 403 error WANG_URL = "ftp://sccn.ucsd.edu/pub/ssvep_benchmark_dataset/" + + # WANG_URL = "http://www.thubci.com/uploads/down/" @@ -104,6 +106,7 @@ class Wang2016(BaseDataset): "P8", "PO7", "PO5", "PO3", "POz", "PO4", "PO6", "PO8", "CB1", "O1", "Oz", "O2", "CB2", "stim", ] + # fmt: on def __init__(self): @@ -111,7 +114,7 @@ def __init__(self): subjects=list(range(1, 35)), sessions_per_subject=1, events=self._events, - code="Wang_SSVEP", + code="Wang-SSVEP", interval=[0.5, 5.5], paradigm="ssvep", doi="doi://10.1109/TNSRE.2016.2627556", diff --git a/moabb/tests/benchmark.py b/moabb/tests/benchmark.py index a5ef28b60..7ac549875 100644 --- a/moabb/tests/benchmark.py +++ b/moabb/tests/benchmark.py @@ -21,10 +21,11 @@ def test_benchmark_strdataset(self): pipelines=str(self.pp_dir), evaluations=["WithinSession"], include_datasets=[ - "FakeDataset_imagery_10_2_2__left_hand_right_hand__C3_Cz_C4", - "FakeDataset_p300_10_2_2__Target_NonTarget__C3_Cz_C4", - "FakeDataset_ssvep_10_2_2__13_15__C3_Cz_C4", + "FakeDataset-Imagery-10-2-2-LefthandRighthand-C3CzC4", + "FakeDataset-P300-10-2-2-TargetNontarget-C3CzC4", + "FakeDataset-Ssvep-10-2-2-1315-C3CzC4", ], + overwrite=True, ) self.assertEqual(len(res), 80) @@ -37,6 +38,7 @@ def test_benchmark_objdataset(self): FakeDataset(["Target", "NonTarget"], paradigm="p300"), FakeDataset(["13", "15"], paradigm="ssvep"), ], + overwrite=True, ) self.assertEqual(len(res), 80) @@ -53,6 +55,7 @@ def test_selectparadigm(self): pipelines=str(self.pp_dir), evaluations=["WithinSession"], paradigms=["FakeImageryParadigm"], + overwrite=True, ) self.assertEqual(len(res), 40) diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index 0be586e9c..ea64feb53 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -5,7 +5,7 @@ import mne -from moabb import datasets as db +import moabb.datasets as db from moabb.datasets import Shin2017A, Shin2017B, VirtualReality from moabb.datasets.base import BaseDataset from moabb.datasets.compound_dataset import CompoundDataset @@ -150,6 +150,13 @@ def test_dataset_accept(self): if mne.get_config("MNE_DATASETS_BBCIFNIRS_PATH") is None: self.assertRaises(AttributeError, ds.get_data, [1]) + def test_datasets_init(self): + for ds in dataset_list: + kwargs = {} + if inspect.signature(ds).parameters.get("accept"): + kwargs["accept"] = True + self.assertIsNotNone(ds(**kwargs)) + def test_dataset_list(self): all_datasets = len( [ diff --git a/tutorials/tutorial_4_adding_a_dataset.py b/tutorials/tutorial_4_adding_a_dataset.py index be938006d..0cc329cb6 100644 --- a/tutorials/tutorial_4_adding_a_dataset.py +++ b/tutorials/tutorial_4_adding_a_dataset.py @@ -66,7 +66,6 @@ def create_example_dataset(): mdict["fs"] = fs savemat(filename, mdict) - ############################################################################## # Creating a Dataset Class # ------------------------ @@ -86,6 +85,7 @@ def create_example_dataset(): ExampleDataset_URL = "https://sandbox.zenodo.org/record/369543/files/" + ############################################################################## # The ``ExampleDataset`` needs to implement only 3 functions: # @@ -107,7 +107,7 @@ def __init__(self): subjects=[1, 2, 3], sessions_per_subject=1, events={"left_hand": 1, "right_hand": 2}, - code="Example dataset", + code="ExampleDataset", interval=[0, 0.75], paradigm="imagery", doi="", From fcc1abfe5b60d9721099fc60c6b414552536f8ef Mon Sep 17 00:00:00 2001 From: Sara Sedlar Date: Thu, 10 Aug 2023 14:26:26 +0200 Subject: [PATCH 33/64] dataset download fix (#433) * Add possibility to have non-http downloaders; keeping certificate check off in http downloaders (not suggested) * [pre-commit.ci] auto fixes from pre-commit.com hooks * Update whats_new.rst --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Bru --- docs/source/whats_new.rst | 1 + moabb/datasets/download.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 1be4bcfe1..3f64aa209 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -61,6 +61,7 @@ Bugs - Fix :func:`moabb.paradigms.base.BaseParadigm` using attributes before defining them (PR :gh:`408`, issue :gh:`425` by `Pierre Guetschel`_) - Fix :func:`moabb.paradigms.FakeImageryParadigm`, :func:`moabb.paradigms.FakeP300Paradigm` and :func:`moabb.paradigms.FakeSSVEPParadigm` ``is_valid`` methods to only accept the correct datasets (PR :gh:`408` by `Pierre Guetschel`_) - Fix ``dataset_list`` construction, which could be empty due to bad import order (PR :gh:`449` by `Thomas Moreau`_). +- Fixing dataset downloader from servers with non-http (PR :gh:`433` by `Sara Sedlar`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py index f1b166b3f..4684a89fe 100644 --- a/moabb/datasets/download.py +++ b/moabb/datasets/download.py @@ -12,7 +12,8 @@ from mne import get_config, set_config from mne.datasets.utils import _get_path from mne.utils import _url_to_local_path, verbose -from pooch import HTTPDownloader, file_hash, retrieve +from pooch import file_hash, retrieve +from pooch.downloaders import choose_downloader from requests.exceptions import HTTPError @@ -139,7 +140,9 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None): table = {ord(c): "-" for c in ':*?"<>|'} destination = Path(str(path) + destination.split(str(path))[1].translate(table)) - downloader = HTTPDownloader(verify=False) + downloader = choose_downloader(url, progressbar=True) + if type(downloader).__name__ in ["HTTPDownloader", "DOIDownloader"]: + downloader.kwargs.setdefault("verify", False) # Fetch the file if not destination.is_file() or force_update: From 4de985840b14aed9fb23dd2dbbc08fcc21835eea Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Fri, 18 Aug 2023 09:13:57 +0200 Subject: [PATCH 34/64] Normalize dataset names (#455) * Add depreciation decorator * Update test_dataset_list * Ignore aliases in test_dataset_list * Add is_abreviation function * Add tests for is_abreviation and is-camel-kebab_case * Rename is_abbrev * Fix dashes in is_abbrev * Fix error message * Update BNCI datasets names and codes * Update mamem dataset codes * Update epfl dataset code * Update DeamonsP300 dataset code * Test if codes are unique * Update Huebner codes * Relax is_abrev * Update FakeDataset code * Update code and name of Passive Head Mounted Display with Music Listening dataset * Update name of GrosseWentrup2009 * Update code of Nakanishi2015 * Update name of SSVEPExo * Update add argument suffix to Shin2017 and rename to BaseShin2017 * Update code of Sosulski2019 * Re-rename HeadMountedDisplay because colision with VR-P300 * Rename VirtualReality to Cattan2019_VR * Add alias for VirtualReality * Update code of Wand2016 * Update name of braininvaders datasets * Add compound_dataset_list with test * Del _init_dataset_list after usage * Change names of bi-illeteracy datasets * Fix hardcoded code test for Cattan2019-VR * Fix benchmark test * Update whats_new.rst * Update BI2014b Fixing the number of subjects in BI2014b. * Update dataset summaries * Fix case when note already present * Fix forgotten change of BNCI2014001 * warn instead of raise when code and class name don't match (compatibility with depreciated names) * Add test for depreciated datasets init * Fix asserNoLogs only introduced in python3.10 * Specity test for depreciated warning * Undo edit to old whats_new.rst lines * Fix note formatting * Update SSVEPExo name and code to Kalunga2016 --------- Co-authored-by: Bru Co-authored-by: Sara Sedlar --- docs/source/dataset_summary.rst | 52 +++--- docs/source/datasets.rst | 48 +++--- docs/source/whats_new.rst | 4 + .../plot_filterbank_csp_vs_csp.py | 4 +- .../plot_grid_search_withinsession.py | 8 +- .../plot_mne_and_scikit_estimators.py | 4 +- .../plot_select_electrodes_resample.py | 4 +- .../plot_statistical_analysis.py | 6 +- examples/example_codecarbon.py | 4 +- .../plot_learning_curve_p300_external.py | 4 +- .../plot_learning_curve_motor_imagery.py | 6 +- .../plot_learning_curve_p300.py | 4 +- examples/load_model.py | 8 +- examples/plot_benchmark_DL.py | 8 +- examples/plot_benchmark_braindecode.py | 10 +- examples/plot_braindecode.py | 8 +- examples/plot_cross_session_motor_imagery.py | 6 +- .../plot_cross_session_multiple_datasets.py | 4 +- examples/plot_cross_subject_ssvep.py | 6 +- examples/plot_explore_paradigm.py | 6 +- examples/plot_phmd_ml_spectrum.py | 8 +- .../plot_vr_pc_p300_different_epoch_size.py | 6 +- examples/plot_within_session_p300.py | 4 +- moabb/datasets/Lee2019.py | 2 +- moabb/datasets/__init__.py | 57 +++++-- moabb/datasets/base.py | 20 ++- moabb/datasets/bbci_eeg_fnirs.py | 29 +++- moabb/datasets/bnci.py | 155 +++++++++--------- moabb/datasets/braininvaders.py | 86 +++++----- moabb/datasets/compound_dataset/__init__.py | 25 ++- .../compound_dataset/bi_illiteracy.py | 61 ++++--- moabb/datasets/compound_dataset/utils.py | 13 ++ moabb/datasets/epfl.py | 2 +- moabb/datasets/fake.py | 13 +- moabb/datasets/huebner_llp.py | 4 +- moabb/datasets/mpi_mi.py | 14 +- moabb/datasets/neiry.py | 2 +- moabb/datasets/phmd_ml.py | 17 +- moabb/datasets/sosulski2019.py | 4 +- moabb/datasets/ssvep_exo.py | 17 +- moabb/datasets/ssvep_mamem.py | 12 +- moabb/datasets/ssvep_nakanishi.py | 2 +- moabb/datasets/ssvep_wang.py | 2 +- moabb/tests/benchmark.py | 6 +- moabb/tests/datasets.py | 135 ++++++++++++--- moabb/tests/download.py | 38 ++--- moabb/tests/util_tests.py | 108 +++++++++++- moabb/utils.py | 61 +++++++ tutorials/plot_Getting_Started.py | 5 +- ...tutorial_1_simple_example_motor_imagery.py | 7 +- .../tutorial_2_using_mulitple_datasets.py | 6 +- ...orial_3_benchmarking_multiple_pipelines.py | 5 +- .../tutorial_5_build_a_custom_dataset.py | 9 +- 53 files changed, 756 insertions(+), 383 deletions(-) create mode 100644 moabb/datasets/compound_dataset/utils.py diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst index b1986908e..f8602b256 100644 --- a/docs/source/dataset_summary.rst +++ b/docs/source/dataset_summary.rst @@ -21,14 +21,14 @@ Motor Imagery :class: sortable AlexMI,8,16,3,20,3s,512Hz,1,1,480 - BNCI2014001,9,22,4,144,4s,250Hz,2,6,62208 - BNCI2014002,14,15,2,80,5s,512Hz,1,8,17920 - BNCI2014004,9,3,2,360,4.5s,250Hz,5,1,32400 - BNCI2015001,12,13,2,200,5s,512Hz,3,1,14400 - BNCI2015004,9,30,5,80,7s,256Hz,2,1,7200 + BNCI2014_001,9,22,4,144,4s,250Hz,2,6,62208 + BNCI2014_002,14,15,2,80,5s,512Hz,1,8,17920 + BNCI2014_004,9,3,2,360,4.5s,250Hz,5,1,32400 + BNCI2015_001,12,13,2,200,5s,512Hz,3,1,14400 + BNCI2015_004,9,30,5,80,7s,256Hz,2,1,7200 Cho2017,52,64,2,100,3s,512Hz,1,1,9800 - Lee2019_MI,55,62,2,100,4s,1000Hz,2,1,11000 - MunichMI,10,128,2,150,7s,500Hz,1,1,3000 + Lee2019_MI,54,62,2,100,4s,1000Hz,2,1,11000 + GrosseWentrup2009,10,128,2,150,7s,500Hz,1,1,3000 Schirrmeister2017,14,128,4,120,4s,500Hz,1,2,13440 Ofner2017,15,61,7,60,3s,512Hz,1,10,63000 PhysionetMI,109,64,4,23,3s,160Hz,1,1,69760 @@ -44,19 +44,19 @@ P300/ERP :header: Dataset, #Subj, #Chan, #Trials / class, Trials length, Sampling rate, #Sessions :class: sortable - BNCI2014008, 8, 8, 3500 NT / 700 T, 1s, 256Hz, 1 - BNCI2014009, 10, 16, 1440 NT / 288 T, 0.8s, 256Hz, 3 - BNCI2015003, 10, 8, 1500 NT / 300 T, 0.8s, 256Hz, 1 - bi2012, 25, 16, 6140 NT / 128 T, 1s, 512Hz, 2 - bi2013a, 24, 16, 3200 NT / 640 T, 1s, 512Hz, 8 for subjects 1-7 else 1 - bi2014a, 64, 16, 990 NT / 198 T, 1s, 512Hz, up to 3 - bi2014b, 37, 32, 200 NT / 40 T, 1s, 512Hz, 3 - bi2015a, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 - bi2015b, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 2 - VirtualReality, 21, 16, 600 NT / 120 T, 1s, 512Hz, 2 + BNCI2014_008, 8, 8, 3500 NT / 700 T, 1s, 256Hz, 1 + BNCI2014_009, 10, 16, 1440 NT / 288 T, 0.8s, 256Hz, 3 + BNCI2015_003, 10, 8, 1500 NT / 300 T, 0.8s, 256Hz, 1 + BI2012, 25, 16, 640 NT / 128 T, 1s, 128Hz, 2 + BI2013a, 24, 16, 3200 NT / 640 T, 1s, 512Hz, 8 for subjects 1-7 else 1 + BI2014a, 64, 16, 990 NT / 198 T, 1s, 512Hz, up to 3 + BI2014b, 38, 32, 200 NT / 40 T, 1s, 512Hz, 3 + BI2015a, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 + BI2015b, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 1 + Cattan2019_VR, 21, 16, 600 NT / 120 T, 1s, 512Hz, 2 Huebner2017, 13, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 Huebner2018, 12, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 - Sosulski2019, 13, 31, 75 NT / 15 T, , 1000Hz, 3 + Sosulski2019, 13, 31, 75 NT / 15 T, 1.2s, 1000Hz, 3 EPFLP300, 8, 32, 2753 NT / 551 T, 1s, 2048Hz, 4 Lee2019_ERP, 54, 62, 6900 NT / 1380 T, 1s, 1000Hz, 2 @@ -70,7 +70,7 @@ SSVEP :class: sortable Lee2019_SSVEP,54,16,4,25,1s,1000Hz,1 - SSVEPExo,12,8,4,16,2s,256Hz,1 + Kalunga2016,12,8,4,16,2s,256Hz,1 MAMEM1,10,256,5,12-15,3s,250Hz,1 MAMEM2,10,256,5,20-30,3s,250Hz,1 MAMEM3,10,14,4,20-30,3s,128Hz,1 @@ -89,7 +89,7 @@ is a resting state experiment. :header: Dataset, #Subj, #Chan, #Classes, #Blocks / class, Trials length, Sampling rate, #Sessions :class: sortable - HeadMountedDisplay,12,16,2,10,60s,512Hz,1 + Cattan2019_PHMD,12,16,2,10,60s,512Hz,1 Compound Datasets @@ -103,12 +103,12 @@ select a sample of subject inside a dataset (e.g. subject with high/low performa :header: Dataset, #Subj, #Original datasets :class: sortable - bi2014a_il,17,bi2014a - bi2014b_il,11,bi2014b - bi2015a_il,2,bi2015a - bi2015b_il,25,bi2015b - VirtualReality_il,4,VirtualReality - biIlliteracy,59,bi2014a_il bi2014b_il bi2015a_il bi2015b_il VirtualReality_il + BI2014a_Il,17,BI2014a + BI2014b_Il,11,BI2014b + BI2015a_Il,2,BI2015a + BI2015b_Il,25,BI2015b + Cattan2019_VR_Il,4,Cattan2019_VR + BI_Il,59,BI2014a_Il BI2014b_Il BI2015a_Il BI2015b_Il Cattan2019_VR_Il Submit a new dataset diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 63dc2455d..fefacc934 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -15,14 +15,14 @@ Motor Imagery Datasets :template: class.rst AlexMI - BNCI2014001 - BNCI2014002 - BNCI2014004 - BNCI2015001 - BNCI2015004 + BNCI2014_001 + BNCI2014_002 + BNCI2014_004 + BNCI2015_001 + BNCI2015_004 Cho2017 Lee2019_MI - MunichMI + GrosseWentrup2009 Ofner2017 PhysionetMI Schirrmeister2017 @@ -40,16 +40,16 @@ ERP Datasets :toctree: generated/ :template: class.rst - bi2012 - bi2013a - bi2014a - bi2014b - bi2015a - bi2015b - VirtualReality - BNCI2014008 - BNCI2014009 - BNCI2015003 + BI2012 + BI2013a + BI2014a + BI2014b + BI2015a + BI2015b + Cattan2019_VR + BNCI2014_008 + BNCI2014_009 + BNCI2015_003 DemonsP300 EPFLP300 Huebner2017 @@ -66,7 +66,7 @@ SSVEP Datasets :toctree: generated/ :template: class.rst - SSVEPExo + Kalunga2016 Nakanishi2015 Wang2016 MAMEM1 @@ -83,7 +83,7 @@ Resting State Datasets :toctree: generated/ :template: class.rst - HeadMountedDisplay + Cattan2019_PHMD ------------ @@ -131,9 +131,9 @@ ERP Datasets :toctree: generated/ :template: class.rst - bi2014a_il - bi2014b_il - bi2015a_il - bi2015b_il - VirtualReality_il - biIlliteracy + BI2014a_Il + BI2014b_Il + BI2015a_Il + BI2015b_Il + Cattan2019_VR_Il + BI_Il diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 3f64aa209..fb2255f39 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -40,6 +40,10 @@ Enhancements - Allow :func:`moabb.datasets.utils.dataset_search` to search across paradigms ``paradigm=None`` (PR :gh:`408` by `Pierre Guetschel`_) - Improving the review processing with more pre-commit bots (:gh:`435` by `Bruno Aristimunha`_) - Update all dataset codes to remove white spaces and underscores (:gh:`448` by `Pierre Guetschel`_) +- Add :func:`moabb.utils.depreciated_alias` decorator (:gh:`455` by `Pierre Guetschel`_) +- Rename many dataset class names to standardize and deprecate old names (:gh:`455` by `Pierre Guetschel`_) +- Change many dataset codes to match the class names (:gh:`455` by `Pierre Guetschel`_) +- Add :obj:`moabb.datasets.compound_dataset.utils.compound_dataset_list` (:gh:`455` by `Pierre Guetschel`_) Bugs ~~~~ diff --git a/examples/advanced_examples/plot_filterbank_csp_vs_csp.py b/examples/advanced_examples/plot_filterbank_csp_vs_csp.py index 0e9a2e5f5..a5708c26d 100644 --- a/examples/advanced_examples/plot_filterbank_csp_vs_csp.py +++ b/examples/advanced_examples/plot_filterbank_csp_vs_csp.py @@ -18,7 +18,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.evaluations import CrossSessionEvaluation from moabb.paradigms import FilterBankLeftRightImagery, LeftRightImagery from moabb.pipelines.utils import FilterBank @@ -61,7 +61,7 @@ # from 8 to 35 Hz. # Because this is being auto-generated we only use 2 subjects -dataset = BNCI2014001() +dataset = BNCI2014_001() dataset.subject_list = dataset.subject_list[:2] datasets = [dataset] overwrite = False # set to True if we want to overwrite cached results diff --git a/examples/advanced_examples/plot_grid_search_withinsession.py b/examples/advanced_examples/plot_grid_search_withinsession.py index 85a15dd44..d5c0f5294 100644 --- a/examples/advanced_examples/plot_grid_search_withinsession.py +++ b/examples/advanced_examples/plot_grid_search_withinsession.py @@ -17,7 +17,7 @@ from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import MotorImagery @@ -30,7 +30,7 @@ # Select the Subject subjects = [1] # Load the dataset -dataset = BNCI2014001() +dataset = BNCI2014_001() events = ["right_hand", "left_hand"] @@ -144,7 +144,7 @@ os.path.join( path, "GridSearch_WithinSession", - "001-2014", + "BNCI2014-001", "1", "session_E", "GridSearchEN", @@ -164,7 +164,7 @@ os.path.join( path, "GridSearch_WithinSession", - "001-2014", + "BNCI2014-001", "1", "session_T", "GridSearchEN", diff --git a/examples/advanced_examples/plot_mne_and_scikit_estimators.py b/examples/advanced_examples/plot_mne_and_scikit_estimators.py index 8149eb99a..d6bb98e66 100644 --- a/examples/advanced_examples/plot_mne_and_scikit_estimators.py +++ b/examples/advanced_examples/plot_mne_and_scikit_estimators.py @@ -37,7 +37,7 @@ find_significant_differences, ) from moabb.analysis.plotting import paired_plot, summary_plot -from moabb.datasets import BNCI2014009 +from moabb.datasets import BNCI2014_009 from moabb.evaluations import CrossSessionEvaluation from moabb.paradigms import P300 @@ -52,7 +52,7 @@ # # Load 2 subjects of BNCI 2014-009 dataset, with 3 session each -dataset = BNCI2014009() +dataset = BNCI2014_009() dataset.subject_list = dataset.subject_list[:3] datasets = [dataset] paradigm = P300() diff --git a/examples/advanced_examples/plot_select_electrodes_resample.py b/examples/advanced_examples/plot_select_electrodes_resample.py index a569c9f00..ef408cb49 100644 --- a/examples/advanced_examples/plot_select_electrodes_resample.py +++ b/examples/advanced_examples/plot_select_electrodes_resample.py @@ -20,7 +20,7 @@ from sklearn.pipeline import make_pipeline import moabb.analysis.plotting as moabb_plt -from moabb.datasets import BNCI2014001, Zhou2016 +from moabb.datasets import BNCI2014_001, Zhou2016 from moabb.datasets.utils import find_intersecting_channels from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -33,7 +33,7 @@ # Load 2 subjects of BNCI 2014-004 and Zhou2016 datasets, with 2 sessions each subj = [1, 2] -datasets = [Zhou2016(), BNCI2014001()] +datasets = [Zhou2016(), BNCI2014_001()] for d in datasets: d.subject_list = subj diff --git a/examples/advanced_examples/plot_statistical_analysis.py b/examples/advanced_examples/plot_statistical_analysis.py index 73bbea947..0c4b3c5a8 100644 --- a/examples/advanced_examples/plot_statistical_analysis.py +++ b/examples/advanced_examples/plot_statistical_analysis.py @@ -27,7 +27,7 @@ compute_dataset_statistics, find_significant_differences, ) -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.evaluations import CrossSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -70,7 +70,7 @@ # Evaluation # ---------- # -# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014001). +# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014_001). # The evaluation will return a DataFrame containing a single AUC score for # each subject / session of the dataset, and for each pipeline. # @@ -79,7 +79,7 @@ # be overwritten if necessary. paradigm = LeftRightImagery() -dataset = BNCI2014001() +dataset = BNCI2014_001() dataset.subject_list = dataset.subject_list[:4] datasets = [dataset] overwrite = True # set to False if we want to use cached results diff --git a/examples/example_codecarbon.py b/examples/example_codecarbon.py index 68286d71b..6cdef47f9 100644 --- a/examples/example_codecarbon.py +++ b/examples/example_codecarbon.py @@ -19,7 +19,7 @@ ############################################################################### from moabb import benchmark, set_log_level from moabb.analysis.plotting import codecarbon_plot -from moabb.datasets import BNCI2014001, Zhou2016 +from moabb.datasets import BNCI2014_001, Zhou2016 from moabb.paradigms import LeftRightImagery @@ -69,7 +69,7 @@ # folder. dataset = Zhou2016() -dataset2 = BNCI2014001() +dataset2 = BNCI2014_001() dataset.subject_list = dataset.subject_list[:1] dataset2.subject_list = dataset2.subject_list[:1] datasets = [dataset, dataset2] diff --git a/examples/external/plot_learning_curve_p300_external.py b/examples/external/plot_learning_curve_p300_external.py index 574b5df24..c30a76dab 100644 --- a/examples/external/plot_learning_curve_p300_external.py +++ b/examples/external/plot_learning_curve_p300_external.py @@ -33,7 +33,7 @@ from tdlda import Vectorizer as JumpingMeansVectorizer import moabb -from moabb.datasets import BNCI2014009 +from moabb.datasets import BNCI2014_009 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import P300 @@ -99,7 +99,7 @@ # and dataset size. paradigm = P300(resample=processing_sampling_rate) -dataset = BNCI2014009() +dataset = BNCI2014_009() # Remove the slicing of the subject list to evaluate multiple subjects dataset.subject_list = dataset.subject_list[0:1] datasets = [dataset] diff --git a/examples/learning_curve/plot_learning_curve_motor_imagery.py b/examples/learning_curve/plot_learning_curve_motor_imagery.py index 99db91a6a..7492d9095 100644 --- a/examples/learning_curve/plot_learning_curve_motor_imagery.py +++ b/examples/learning_curve/plot_learning_curve_motor_imagery.py @@ -30,7 +30,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -63,7 +63,7 @@ # Evaluation # ---------- # -# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014001). +# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014_001). # The evaluation will return a DataFrame containing a single AUC score for # each subject / session of the dataset, and for each pipeline. # @@ -72,7 +72,7 @@ # be overwritten if necessary. paradigm = LeftRightImagery() -dataset = BNCI2014001() +dataset = BNCI2014_001() dataset.subject_list = dataset.subject_list[:1] datasets = [dataset] overwrite = True # set to True if we want to overwrite cached results diff --git a/examples/learning_curve/plot_learning_curve_p300.py b/examples/learning_curve/plot_learning_curve_p300.py index f17aa5ed7..4f9d159f9 100644 --- a/examples/learning_curve/plot_learning_curve_p300.py +++ b/examples/learning_curve/plot_learning_curve_p300.py @@ -32,7 +32,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014009 +from moabb.datasets import BNCI2014_009 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import P300 @@ -77,7 +77,7 @@ # and dataset size. paradigm = P300(resample=processing_sampling_rate) -dataset = BNCI2014009() +dataset = BNCI2014_009() # Remove the slicing of the subject list to evaluate multiple subjects dataset.subject_list = dataset.subject_list[1:2] datasets = [dataset] diff --git a/examples/load_model.py b/examples/load_model.py index 836f54483..3af6d322e 100644 --- a/examples/load_model.py +++ b/examples/load_model.py @@ -53,7 +53,7 @@ # We load the single Keras model, if we want we can set in the exact same pipeline. model_Keras = keras.models.load_model( - "./results/Models_WithinSession/001-2014/1/session_E/Keras_DeepConvNet/kerasdeepconvnet_fitted_model_best.h5" + "./results/Models_WithinSession/BNCI2014-001/1/session_E/Keras_DeepConvNet/kerasdeepconvnet_fitted_model_best.h5" ) # Now we need to instantiate a new SciKeras object since we only saved the Keras model Keras_DeepConvNet_Trained = KerasClassifier(model_Keras) @@ -108,9 +108,9 @@ clf.initialize() -f_params = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_model.pkl" -f_optimizer = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_optim.pkl" -f_history = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_history.json" +f_params = "./results/Models_CrossSession/BNCI2014-001/1/braindecode_EEGInception/EEGInception_fitted_best_model.pkl" +f_optimizer = "./results/Models_CrossSession/BNCI2014-001/1/braindecode_EEGInception/EEGInception_fitted_best_optim.pkl" +f_history = "./results/Models_CrossSession/BNCI2014-001/1/braindecode_EEGInception/EEGInception_fitted_best_history.json" clf.load_params(f_params=f_params, f_optimizer=f_optimizer, f_history=f_history) diff --git a/examples/plot_benchmark_DL.py b/examples/plot_benchmark_DL.py index 3df4647a9..76e7b4d5c 100644 --- a/examples/plot_benchmark_DL.py +++ b/examples/plot_benchmark_DL.py @@ -20,7 +20,7 @@ from moabb import benchmark, set_log_level from moabb.analysis.plotting import score_plot -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.utils import setup_seed @@ -40,7 +40,7 @@ print("GPU is", "AVAILABLE" if GPU else "NOT AVAILABLE") ############################################################################### -# In this example, we will use only the dataset ``BNCI2014001``. +# In this example, we will use only the dataset ``BNCI2014_001``. # # Running the benchmark # --------------------- @@ -64,8 +64,8 @@ # Set up reproducibility of Tensorflow setup_seed(42) -# Restrict this example only on the first two subject of BNCI2014001 -dataset = BNCI2014001() +# Restrict this example only on the first two subject of BNCI2014_001 +dataset = BNCI2014_001() dataset.subject_list = dataset.subject_list[:2] datasets = [dataset] diff --git a/examples/plot_benchmark_braindecode.py b/examples/plot_benchmark_braindecode.py index 2b7d23e56..5e261fe14 100644 --- a/examples/plot_benchmark_braindecode.py +++ b/examples/plot_benchmark_braindecode.py @@ -21,7 +21,7 @@ from moabb import benchmark, set_log_level from moabb.analysis.plotting import score_plot -from moabb.datasets import BNCI2014001, BNCI2014004 +from moabb.datasets import BNCI2014_001, BNCI2014_004 from moabb.utils import setup_seed @@ -39,7 +39,7 @@ print("GPU is", "AVAILABLE" if cuda else "NOT AVAILABLE") ############################################################################### -# In this example, we will use only 2 subjects from the dataset ``BNCI2014001`` and ``BNCI2014004``. +# In this example, we will use only 2 subjects from the dataset ``BNCI2014_001`` and ``BNCI2014_004``. # # Running the benchmark # --------------------- @@ -67,9 +67,9 @@ # Set up reproducibility of Tensorflow setup_seed(42) -# Restrict this example only to the first two subjects of BNCI2014001 -dataset = BNCI2014001() -dataset2 = BNCI2014004() +# Restrict this example only to the first two subjects of BNCI2014_001 +dataset = BNCI2014_001() +dataset2 = BNCI2014_004() dataset.subject_list = dataset.subject_list[:2] dataset2.subject_list = dataset2.subject_list[:2] datasets = [dataset, dataset2] diff --git a/examples/plot_braindecode.py b/examples/plot_braindecode.py index 4f3b0b75a..06ff06939 100644 --- a/examples/plot_braindecode.py +++ b/examples/plot_braindecode.py @@ -20,7 +20,7 @@ from skorch.callbacks import EarlyStopping, EpochScoring from skorch.dataset import ValidSplit -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.evaluations import CrossSessionEvaluation from moabb.paradigms import MotorImagery from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG @@ -38,12 +38,12 @@ print("GPU is", "AVAILABLE" if cuda else "NOT AVAILABLE") ############################################################################### -# In this example, we will use only the dataset ``BNCI2014001``. +# In this example, we will use only the dataset ``BNCI2014_001``. # # Running the benchmark # --------------------- # -# This example uses the CrossSession evaluation procedure. We focus on the dataset BNCI2014001 and only on 1 subject +# This example uses the CrossSession evaluation procedure. We focus on the dataset BNCI2014_001 and only on 1 subject # to reduce computational time. # # To keep the computational time low, the epoch is reduced. In a real situation, we suggest using the following: @@ -74,7 +74,7 @@ tmax = None # Load the dataset -dataset = BNCI2014001() +dataset = BNCI2014_001() events = ["right_hand", "left_hand"] paradigm = MotorImagery( events=events, n_classes=len(events), fmin=fmin, fmax=fmax, tmin=tmin, tmax=tmax diff --git a/examples/plot_cross_session_motor_imagery.py b/examples/plot_cross_session_motor_imagery.py index ede6a94ed..61c0e3052 100644 --- a/examples/plot_cross_session_motor_imagery.py +++ b/examples/plot_cross_session_motor_imagery.py @@ -34,7 +34,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.evaluations import CrossSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -65,7 +65,7 @@ # Evaluation # ---------- # -# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014001). +# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014_001). # The evaluation will return a DataFrame containing a single AUC score for # each subject / session of the dataset, and for each pipeline. # @@ -75,7 +75,7 @@ paradigm = LeftRightImagery() # Because this is being auto-generated we only use 2 subjects -dataset = BNCI2014001() +dataset = BNCI2014_001() dataset.subject_list = dataset.subject_list[:2] datasets = [dataset] overwrite = False # set to True if we want to overwrite cached results diff --git a/examples/plot_cross_session_multiple_datasets.py b/examples/plot_cross_session_multiple_datasets.py index a21e1b82f..c6565582f 100644 --- a/examples/plot_cross_session_multiple_datasets.py +++ b/examples/plot_cross_session_multiple_datasets.py @@ -24,7 +24,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014001, Zhou2016 +from moabb.datasets import BNCI2014_001, Zhou2016 from moabb.evaluations import CrossSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -40,7 +40,7 @@ # Load 2 subjects of BNCI 2014-004 and Zhou2016 datasets, with 2 session each subj = [1, 2] -datasets = [Zhou2016(), BNCI2014001()] +datasets = [Zhou2016(), BNCI2014_001()] for d in datasets: d.subject_list = subj diff --git a/examples/plot_cross_subject_ssvep.py b/examples/plot_cross_subject_ssvep.py index 55e414a19..e6319b509 100644 --- a/examples/plot_cross_subject_ssvep.py +++ b/examples/plot_cross_subject_ssvep.py @@ -27,7 +27,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import SSVEPExo +from moabb.datasets import Kalunga2016 from moabb.evaluations import CrossSubjectEvaluation from moabb.paradigms import SSVEP, FilterBankSSVEP from moabb.pipelines import SSVEP_CCA, SSVEP_TRCA, ExtendedSSVEPSignal, SSVEP_MsetCCA @@ -49,7 +49,7 @@ # frequency. n_subject = 2 -dataset = SSVEPExo() +dataset = Kalunga2016() dataset.subject_list = dataset.subject_list[:n_subject] interval = dataset.interval @@ -58,7 +58,7 @@ # --------------- # # We define the paradigms (SSVEP, SSVEP TRCA, SSVEP MsetCCA, and FilterBankSSVEP) and -# use the dataset SSVEPExo. All 3 SSVEP paradigms applied a bandpass filter (10-42 Hz) on +# use the dataset Kalunga2016. All 3 SSVEP paradigms applied a bandpass filter (10-42 Hz) on # the data, which include all stimuli frequencies and their first harmonics, # while the FilterBankSSVEP paradigm uses as many bandpass filters as # there are stimulation frequencies (here 3). For each stimulation frequency diff --git a/examples/plot_explore_paradigm.py b/examples/plot_explore_paradigm.py index 1cab0593a..ce24066f3 100644 --- a/examples/plot_explore_paradigm.py +++ b/examples/plot_explore_paradigm.py @@ -26,7 +26,7 @@ import numpy as np -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.paradigms import FilterBankMotorImagery, LeftRightImagery, MotorImagery @@ -50,14 +50,14 @@ print(paradigm.get_data.__doc__) ############################################################################### -# Lets take the example of the BNCI2014001 dataset, known as the dataset IIa +# Lets take the example of the BNCI2014_001 dataset, known as the dataset IIa # from the BCI competition IV. We will load the data from the subject 1. # When calling `get_data`, the paradigm will retrieve the data from the # specified list of subjects, apply preprocessing (by default, a bandpass # between 7 and 35 Hz), epoch the data (with interval specified by the dataset, # unless superseded by the paradigm) and return the corresponding objects. -dataset = BNCI2014001() +dataset = BNCI2014_001() subjects = [1] X, y, metadata = paradigm.get_data(dataset=dataset, subjects=subjects) diff --git a/examples/plot_phmd_ml_spectrum.py b/examples/plot_phmd_ml_spectrum.py index 4c42a9df4..746138702 100644 --- a/examples/plot_phmd_ml_spectrum.py +++ b/examples/plot_phmd_ml_spectrum.py @@ -5,7 +5,7 @@ This example demonstrates how to perform spectral analysis on epochs extracted from a specific subject -within the :class:`moabb.datasets.HeadMountedDisplay` dataset. +within the :class:`moabb.datasets.Cattan2019_PHMD` dataset. """ @@ -18,7 +18,7 @@ import matplotlib.pyplot as plt import numpy as np -from moabb.datasets import HeadMountedDisplay +from moabb.datasets import Cattan2019_PHMD from moabb.paradigms import RestingStateToP300Adapter @@ -29,7 +29,7 @@ # --------------- # # 1) Specify the channel and subject to compute the power spectrum. -# 2) Create an instance of the :class:`moabb.datasets.HeadMountedDisplay` dataset. +# 2) Create an instance of the :class:`moabb.datasets.Cattan2019_PHMD` dataset. # 3) Create an instance of the :class:`moabb.paradigms.RestingStateToP300Adapter` paradigm. # By default, the data is filtered between 1-35 Hz, # and epochs are extracted from 10 to 50 seconds after event tagging. @@ -38,7 +38,7 @@ channel = "Cz" subject = 1 -dataset = HeadMountedDisplay() +dataset = Cattan2019_PHMD() events = ["on", "off"] paradigm = RestingStateToP300Adapter(events=events, channels=[channel]) diff --git a/examples/plot_vr_pc_p300_different_epoch_size.py b/examples/plot_vr_pc_p300_different_epoch_size.py index f70701420..4c5fb6a65 100644 --- a/examples/plot_vr_pc_p300_different_epoch_size.py +++ b/examples/plot_vr_pc_p300_different_epoch_size.py @@ -27,7 +27,7 @@ from sklearn.preprocessing import LabelEncoder from tqdm import tqdm -from moabb.datasets import VirtualReality +from moabb.datasets import Cattan2019_VR from moabb.paradigms import P300 @@ -46,7 +46,7 @@ # 3) Encode categorical variable (Target/NonTarget) to numerical values. # We will be using label encoding. -dataset = VirtualReality() +dataset = Cattan2019_VR() paradigm = P300() le = LabelEncoder().fit(["Target", "NonTarget"]) @@ -61,7 +61,7 @@ # tmax, subjects and experimental conditions (VR or PC). # # Not all the data will be used for this validation. -# The VirtualReality dataset contains the data from a randomized experiment. +# The Cattan2019_VR dataset contains the data from a randomized experiment. # We will only be using the two first repetitions of the 12 experimental blocks. # Data will be selected thanks to the `get_block_repetition` method. diff --git a/examples/plot_within_session_p300.py b/examples/plot_within_session_p300.py index 59c9face8..65ceed6b4 100644 --- a/examples/plot_within_session_p300.py +++ b/examples/plot_within_session_p300.py @@ -29,7 +29,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014009 +from moabb.datasets import BNCI2014_009 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import P300 @@ -81,7 +81,7 @@ # be overwritten if necessary. paradigm = P300(resample=128) -dataset = BNCI2014009() +dataset = BNCI2014_009() dataset.subject_list = dataset.subject_list[:2] datasets = [dataset] overwrite = True # set to True if we want to overwrite cached results diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py index 5b57b60ea..741d8af6b 100644 --- a/moabb/datasets/Lee2019.py +++ b/moabb/datasets/Lee2019.py @@ -233,7 +233,7 @@ class Lee2019_MI(Lee2019): ========== ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions ========== ======= ======= ========== ================= ============ =============== =========== - Lee2019_MI 55 62 2 100 4s 1000Hz 2 + Lee2019_MI 54 62 2 100 4s 1000Hz 2 ========== ======= ======= ========== ================= ============ =============== =========== Dataset from Lee et al 2019 [1]_. diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index 6f42e3e16..ff35d8284 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -12,36 +12,36 @@ from .alex_mi import AlexMI from .bbci_eeg_fnirs import Shin2017A, Shin2017B from .bnci import ( - BNCI2014001, - BNCI2014002, - BNCI2014004, - BNCI2014008, - BNCI2014009, - BNCI2015001, - BNCI2015003, - BNCI2015004, + BNCI2014_001, + BNCI2014_002, + BNCI2014_004, + BNCI2014_008, + BNCI2014_009, + BNCI2015_001, + BNCI2015_003, + BNCI2015_004, ) from .braininvaders import ( - VirtualReality, - bi2012, - bi2013a, - bi2014a, - bi2014b, - bi2015a, - bi2015b, + BI2012, + BI2013a, + BI2014a, + BI2014b, + BI2015a, + BI2015b, + Cattan2019_VR, ) from .epfl import EPFLP300 from .fake import FakeDataset, FakeVirtualRealityDataset from .gigadb import Cho2017 from .huebner_llp import Huebner2017, Huebner2018 from .Lee2019 import Lee2019_ERP, Lee2019_MI, Lee2019_SSVEP -from .mpi_mi import MunichMI +from .mpi_mi import GrosseWentrup2009 from .neiry import DemonsP300 -from .phmd_ml import HeadMountedDisplay +from .phmd_ml import Cattan2019_PHMD from .physionet_mi import PhysionetMI from .schirrmeister2017 import Schirrmeister2017 from .sosulski2019 import Sosulski2019 -from .ssvep_exo import SSVEPExo +from .ssvep_exo import Kalunga2016 from .ssvep_mamem import MAMEM1, MAMEM2, MAMEM3 from .ssvep_nakanishi import Nakanishi2015 from .ssvep_wang import Wang2016 @@ -54,3 +54,24 @@ # Call this last in order to make sure the dataset list contains all # the datasets imported in this file. _init_dataset_list() +del _init_dataset_list + +# Depreciated datasets (not added to dataset_list): +from .bnci import BNCI2014001 # noqa: F401 +from .bnci import BNCI2014002 # noqa: F401 +from .bnci import BNCI2014004 # noqa: F401 +from .bnci import BNCI2014008 # noqa: F401 +from .bnci import BNCI2014009 # noqa: F401 +from .bnci import BNCI2015001 # noqa: F401 +from .bnci import BNCI2015003 # noqa: F401 +from .bnci import BNCI2015004 # noqa: F401 +from .braininvaders import VirtualReality # noqa: F401 +from .braininvaders import bi2012 # noqa: F401 +from .braininvaders import bi2013a # noqa: F401 +from .braininvaders import bi2014a # noqa: F401 +from .braininvaders import bi2014b # noqa: F401 +from .braininvaders import bi2015a # noqa: F401 +from .braininvaders import bi2015b # noqa: F401 +from .mpi_mi import MunichMI # noqa: F401 +from .phmd_ml import HeadMountedDisplay # noqa: F401 +from .ssvep_exo import SSVEPExo # noqa: F401 diff --git a/moabb/datasets/base.py b/moabb/datasets/base.py index e3cc9b93c..e9dd7f654 100644 --- a/moabb/datasets/base.py +++ b/moabb/datasets/base.py @@ -126,11 +126,19 @@ def apply_step(pipeline, obj): raise error -def is_camel_kebab_case(name): +def is_camel_kebab_case(name: str): """Check if a string is in CamelCase but can also contain dashes.""" return re.fullmatch(r"[a-zA-Z0-9\-]+", name) is not None +def is_abbrev(abbrev_name: str, full_name: str): + """Check if abbrev_name is an abbreviation of full_name, + i.e. ifthe characters in abbrev_name are all in full_name + and in the same order. They must share the same capital letters.""" + pattern = re.sub(r"([A-Za-z])", r"\1[a-z0-9\-]*", re.escape(abbrev_name)) + return re.fullmatch(pattern, full_name) is not None + + class BaseDataset(metaclass=abc.ABCMeta): """Abstract Moabb BaseDataset. @@ -192,7 +200,15 @@ def __init__( if not is_camel_kebab_case(code): raise ValueError( f"code {code!r} must be in Camel-KebabCase; " - "i.e. use CamelCase, and add dashes where absolutely necessary." + "i.e. use CamelCase, and add dashes where absolutely necessary. " + "See moabb.datasets.base.is_camel_kebab_case for more information." + ) + class_name = self.__class__.__name__.replace("_", "-") + if not is_abbrev(class_name, code): + log.warning( + f"The dataset class name {class_name!r} must be an abbreviation " + f"of its code {code!r}. " + "See moabb.datasets.base.is_abbrev for more information." ) self.subject_list = subjects diff --git a/moabb/datasets/bbci_eeg_fnirs.py b/moabb/datasets/bbci_eeg_fnirs.py index 797087435..4e4e01416 100644 --- a/moabb/datasets/bbci_eeg_fnirs.py +++ b/moabb/datasets/bbci_eeg_fnirs.py @@ -74,11 +74,16 @@ def fnirs_data_path(path, subject, accept): return [op.join(datapath, fn) for fn in ["cnt.mat", "mrk.mat"]] -class Shin2017(BaseDataset): +class BaseShin2017(BaseDataset): """Not to be used.""" def __init__( - self, fnirs=False, motor_imagery=True, mental_arithmetic=False, accept=False + self, + suffix, + fnirs=False, + motor_imagery=True, + mental_arithmetic=False, + accept=False, ): if not any([motor_imagery, mental_arithmetic]): raise ( @@ -107,7 +112,7 @@ def __init__( subjects=list(range(1, 30)), sessions_per_subject=n_sessions, events=events, - code="Shin2017", + code="Shin2017" + suffix, # marker is for *task* start not cue start interval=[0, 10], paradigm=("/").join(paradigms), @@ -180,7 +185,7 @@ def data_path( return eeg_data_path(op.join(path, "MNE-eegfnirs-data"), subject, self.accept) -class Shin2017A(Shin2017): +class Shin2017A(BaseShin2017): """Motor Imagey Dataset from Shin et al 2017. .. admonition:: Dataset summary @@ -295,12 +300,15 @@ class Shin2017A(Shin2017): def __init__(self, accept=False): super().__init__( - fnirs=False, motor_imagery=True, mental_arithmetic=False, accept=accept + suffix="A", + fnirs=False, + motor_imagery=True, + mental_arithmetic=False, + accept=accept, ) - self.code = "Shin2017A" -class Shin2017B(Shin2017): +class Shin2017B(BaseShin2017): """Mental Arithmetic Dataset from Shin et al 2017. .. admonition:: Dataset summary @@ -408,6 +416,9 @@ class Shin2017B(Shin2017): def __init__(self, accept=False): super().__init__( - fnirs=False, motor_imagery=False, mental_arithmetic=True, accept=accept + suffix="B", + fnirs=False, + motor_imagery=False, + mental_arithmetic=True, + accept=accept, ) - self.code = "Shin2017B" diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py index 539213bd6..b5fd1801d 100644 --- a/moabb/datasets/bnci.py +++ b/moabb/datasets/bnci.py @@ -9,6 +9,7 @@ from moabb.datasets import download as dl from moabb.datasets.base import BaseDataset +from moabb.utils import depreciated_alias BNCI_URL = "http://bnci-horizon-2020.eu/database/data-sets/" @@ -22,7 +23,7 @@ def data_path(url, path=None, force_update=False, update_path=None, verbose=None @verbose def load_data( subject, - dataset="001-2014", + dataset="BNCI2014-001", path=None, force_update=False, update_path=None, @@ -69,33 +70,33 @@ def load_data( dictionary containing events and their code. """ dataset_list = { - "001-2014": _load_data_001_2014, - "002-2014": _load_data_002_2014, - "004-2014": _load_data_004_2014, - "008-2014": _load_data_008_2014, - "009-2014": _load_data_009_2014, - "001-2015": _load_data_001_2015, - "003-2015": _load_data_003_2015, - "004-2015": _load_data_004_2015, - "009-2015": _load_data_009_2015, - "010-2015": _load_data_010_2015, - "012-2015": _load_data_012_2015, - "013-2015": _load_data_013_2015, + "BNCI2014-001": _load_data_001_2014, + "BNCI2014-002": _load_data_002_2014, + "BNCI2014-004": _load_data_004_2014, + "BNCI2014-008": _load_data_008_2014, + "BNCI2014-009": _load_data_009_2014, + "BNCI2015-001": _load_data_001_2015, + "BNCI2015-003": _load_data_003_2015, + "BNCI2015-004": _load_data_004_2015, + "BNCI2015-009": _load_data_009_2015, + "BNCI2015-010": _load_data_010_2015, + "BNCI2015-012": _load_data_012_2015, + "BNCI2015-013": _load_data_013_2015, } baseurl_list = { - "001-2014": BNCI_URL, - "002-2014": BNCI_URL, - "001-2015": BNCI_URL, - "004-2014": BNCI_URL, - "008-2014": BNCI_URL, - "009-2014": BNCI_URL, - "003-2015": BNCI_URL, - "004-2015": BNCI_URL, - "009-2015": BBCI_URL, - "010-2015": BBCI_URL, - "012-2015": BBCI_URL, - "013-2015": BNCI_URL, + "BNCI2014-001": BNCI_URL, + "BNCI2014-002": BNCI_URL, + "BNCI2015-001": BNCI_URL, + "BNCI2014-004": BNCI_URL, + "BNCI2014-008": BNCI_URL, + "BNCI2014-009": BNCI_URL, + "BNCI2015-003": BNCI_URL, + "BNCI2015-004": BNCI_URL, + "BNCI2015-009": BBCI_URL, + "BNCI2015-010": BBCI_URL, + "BNCI2015-012": BBCI_URL, + "BNCI2015-013": BNCI_URL, } if dataset not in dataset_list.keys(): @@ -747,17 +748,18 @@ def data_path( ) -class BNCI2014001(MNEBNCI): +@depreciated_alias("BNCI2014001", "0.7") +class BNCI2014_001(MNEBNCI): """BNCI 2014-001 Motor Imagery dataset. .. admonition:: Dataset summary - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions - =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014001 9 22 4 144 4s 250Hz 2 - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== + BNCI2014_001 9 22 4 144 4s 250Hz 2 + ============ ======= ======= ========== ================= ============ =============== =========== Dataset IIa from BCI Competition 4 [1]_. @@ -803,24 +805,25 @@ def __init__(self): subjects=list(range(1, 10)), sessions_per_subject=2, events={"left_hand": 1, "right_hand": 2, "feet": 3, "tongue": 4}, - code="001-2014", + code="BNCI2014-001", interval=[2, 6], paradigm="imagery", doi="10.3389/fnins.2012.00055", ) -class BNCI2014002(MNEBNCI): +@depreciated_alias("BNCI2014002", "0.7") +class BNCI2014_002(MNEBNCI): """BNCI 2014-002 Motor Imagery dataset. .. admonition:: Dataset summary - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions - =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014002 14 15 2 80 5s 512Hz 1 - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== + BNCI2014_002 14 15 2 80 5s 512Hz 1 + ============ ======= ======= ========== ================= ============ =============== =========== Motor Imagery Dataset from [1]_. @@ -864,24 +867,25 @@ def __init__(self): subjects=list(range(1, 15)), sessions_per_subject=1, events={"right_hand": 1, "feet": 2}, - code="002-2014", + code="BNCI2014-002", interval=[3, 8], paradigm="imagery", doi="10.1515/bmt-2014-0117", ) -class BNCI2014004(MNEBNCI): +@depreciated_alias("BNCI2014004", "0.7") +class BNCI2014_004(MNEBNCI): """BNCI 2014-004 Motor Imagery dataset. .. admonition:: Dataset summary - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions - =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2014004 9 3 2 360 4.5s 250Hz 5 - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== + BNCI2014_004 9 3 2 360 4.5s 250Hz 5 + ============ ======= ======= ========== ================= ============ =============== =========== Dataset B from BCI Competition 2008. @@ -946,24 +950,25 @@ def __init__(self): subjects=list(range(1, 10)), sessions_per_subject=5, events={"left_hand": 1, "right_hand": 2}, - code="004-2014", + code="BNCI2014-004", interval=[3, 7.5], paradigm="imagery", doi="10.1109/TNSRE.2007.906956", ) -class BNCI2014008(MNEBNCI): +@depreciated_alias("BNCI2014008", "0.7") +class BNCI2014_008(MNEBNCI): """BNCI 2014-008 P300 dataset. .. admonition:: Dataset summary - =========== ======= ======= ================= =============== =============== =========== + ============ ======= ======= ================= =============== =============== =========== Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions - =========== ======= ======= ================= =============== =============== =========== - BNCI2014008 8 8 3500 NT / 700 T 1s 256Hz 1 - =========== ======= ======= ================= =============== =============== =========== + ============ ======= ======= ================= =============== =============== =========== + BNCI2014_008 8 8 3500 NT / 700 T 1s 256Hz 1 + ============ ======= ======= ================= =============== =============== =========== Dataset from [1]_. @@ -1016,24 +1021,25 @@ def __init__(self): subjects=list(range(1, 9)), sessions_per_subject=1, events={"Target": 2, "NonTarget": 1}, - code="008-2014", + code="BNCI2014-008", interval=[0, 1.0], paradigm="p300", doi="10.3389/fnhum.2013.00732", ) -class BNCI2014009(MNEBNCI): +@depreciated_alias("BNCI2014009", "0.7") +class BNCI2014_009(MNEBNCI): """BNCI 2014-009 P300 dataset. .. admonition:: Dataset summary - =========== ======= ======= ================= =============== =============== =========== + ============ ======= ======= ================= =============== =============== =========== Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions - =========== ======= ======= ================= =============== =============== =========== - BNCI2014009 10 16 1440 NT / 288 T 0.8s 256Hz 3 - =========== ======= ======= ================= =============== =============== =========== + ============ ======= ======= ================= =============== =============== =========== + BNCI2014_009 10 16 1440 NT / 288 T 0.8s 256Hz 3 + ============ ======= ======= ================= =============== =============== =========== Dataset from [1]_. @@ -1077,24 +1083,25 @@ def __init__(self): subjects=list(range(1, 11)), sessions_per_subject=3, events={"Target": 2, "NonTarget": 1}, - code="009-2014", + code="BNCI2014-009", interval=[0, 0.8], paradigm="p300", doi="10.1088/1741-2560/11/3/035008", ) -class BNCI2015001(MNEBNCI): +@depreciated_alias("BNCI2015001", "0.7") +class BNCI2015_001(MNEBNCI): """BNCI 2015-001 Motor Imagery dataset. .. admonition:: Dataset summary - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions - =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2015001 12 13 2 200 5s 512Hz 2 - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== + BNCI2015_001 12 13 2 200 5s 512Hz 2 + ============ ======= ======= ========== ================= ============ =============== =========== Dataset from [1]_. @@ -1132,24 +1139,25 @@ def __init__(self): subjects=list(range(1, 13)), sessions_per_subject=2, events={"right_hand": 1, "feet": 2}, - code="001-2015", + code="BNCI2015-001", interval=[0, 5], paradigm="imagery", doi="10.1109/tnsre.2012.2189584", ) -class BNCI2015003(MNEBNCI): +@depreciated_alias("BNCI2015003", "0.7") +class BNCI2015_003(MNEBNCI): """BNCI 2015-003 P300 dataset. .. admonition:: Dataset summary - =========== ======= ======= ================= =============== =============== =========== + ============ ======= ======= ================= =============== =============== =========== Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions - =========== ======= ======= ================= =============== =============== =========== - BNCI2015003 10 8 1500 NT / 300 T 0.8s 256Hz 1 - =========== ======= ======= ================= =============== =============== =========== + ============ ======= ======= ================= =============== =============== =========== + BNCI2015_003 10 8 1500 NT / 300 T 0.8s 256Hz 1 + ============ ======= ======= ================= =============== =============== =========== Dataset from [1]_. @@ -1174,24 +1182,25 @@ def __init__(self): subjects=list(range(1, 11)), sessions_per_subject=1, events={"Target": 2, "NonTarget": 1}, - code="003-2015", + code="BNCI2015-003", interval=[0, 0.8], paradigm="p300", doi="10.1016/j.neulet.2009.06.045", ) -class BNCI2015004(MNEBNCI): +@depreciated_alias("BNCI2015004", "0.7") +class BNCI2015_004(MNEBNCI): """BNCI 2015-004 Motor Imagery dataset. .. admonition:: Dataset summary - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions - =========== ======= ======= ========== ================= ============ =============== =========== - BNCI2015004 9 30 5 80 7s 256Hz 2 - =========== ======= ======= ========== ================= ============ =============== =========== + ============ ======= ======= ========== ================= ============ =============== =========== + BNCI2015_004 9 30 5 80 7s 256Hz 2 + ============ ======= ======= ========== ================= ============ =============== =========== Dataset from [1]_. @@ -1247,7 +1256,7 @@ def __init__(self): subjects=list(range(1, 10)), sessions_per_subject=2, events=dict(right_hand=4, feet=5, navigation=3, subtraction=2, word_ass=1), - code="004-2015", + code="BNCI2015-004", interval=[3, 10], paradigm="imagery", doi="10.1371/journal.pone.0123727", diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py index b3dbd5a48..65c8b66d2 100644 --- a/moabb/datasets/braininvaders.py +++ b/moabb/datasets/braininvaders.py @@ -16,6 +16,7 @@ from moabb.datasets import download as dl from moabb.datasets.base import BaseDataset from moabb.datasets.utils import block_rep +from moabb.utils import depreciated_alias BI2012a_URL = "https://zenodo.org/record/2649069/files/" @@ -45,7 +46,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 session_name = "session_" + session_number elif ds.code == "BrainInvaders2015a": session_name = f'session_{file_path.split("_")[-1][1:2]}' - elif ds.code == "VR-P300": + elif ds.code == "Cattan2019-VR": session_name = file_path.split(".")[0].split("_")[-1] if session_name not in sessions.keys(): @@ -152,7 +153,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 stim[idx_nontarget] = 1 X = np.concatenate([S, stim[None, :]]) sfreq = 512 - elif ds.code == "VR-P300": + elif ds.code == "Cattan2019-VR": data = loadmat(os.path.join(file_path, os.listdir(file_path)[0]))["data"] chnames = [ @@ -189,7 +190,7 @@ def _bi_get_subject_data(ds, subject): # noqa: C901 verbose=False, ) - if not ds.code == "VR-P300": + if not ds.code == "Cattan2019-VR": raw = mne.io.RawArray(data=X, info=info, verbose=False) raw.set_montage(make_standard_montage("standard_1020")) @@ -388,7 +389,7 @@ def _bi_data_path( # noqa: C901 ) for i in range(1, 5) ] - elif ds.code == "VR-P300": + elif ds.code == "Cattan2019-VR": subject_paths = [] if ds.virtual_reality: url = "{:s}subject_{:02d}_{:s}.mat".format(VIRTUALREALITY_URL, subject, "VR") @@ -402,14 +403,15 @@ def _bi_data_path( # noqa: C901 return subject_paths -class bi2012(BaseDataset): - """P300 dataset bi2012 from a "Brain Invaders" experiment. +@depreciated_alias("bi2012", "0.7") +class BI2012(BaseDataset): + """P300 dataset BI2012 from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2013a 25 16 6140 NT / 128 T 1s 512Hz 2 + BI2012 25 16 640 NT / 128 T 1s 128Hz 2 ================ ======= ======= ================ =============== =============== =========== Dataset following the setup from [1]_ carried-out at University of @@ -468,8 +470,9 @@ def data_path( return _bi_data_path(self, subject, path, force_update, update_path, verbose) -class bi2013a(BaseDataset): - """P300 dataset bi2013a from a "Brain Invaders" experiment. +@depreciated_alias("bi2013a", "0.7") +class BI2013a(BaseDataset): + """P300 dataset BI2013a from a "Brain Invaders" experiment. .. admonition:: Dataset summary @@ -477,7 +480,7 @@ class bi2013a(BaseDataset): ======= ======= ======= ================= =============== =============== ================= Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions ======= ======= ======= ================= =============== =============== ================= - bi2013a 24 16 3200 NT / 640 T 1s 512Hz (1-7)8 s|(8-24)1s + BI2013a 24 16 3200 NT / 640 T 1s 512Hz (1-7)8 s|(8-24)1s ======= ======= ======= ================= =============== =============== ================= Dataset following the setup from [1]_ carried-out at University of @@ -570,14 +573,15 @@ def data_path( return _bi_data_path(self, subject, path, force_update, update_path, verbose) -class bi2014a(BaseDataset): - """P300 dataset bi2014a from a "Brain Invaders" experiment. +@depreciated_alias("bi2014a", "0.7") +class BI2014a(BaseDataset): + """P300 dataset BI2014a from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2014a 64 16 5 NT x 1 T 1s 512Hz up to 3 + BI2014a 64 16 5 NT x 1 T 1s 512Hz up to 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings of 71 subjects @@ -586,7 +590,7 @@ class bi2014a(BaseDataset): that are flashed pseudo-randomly to elicit the P300 response. EEG data were recorded using 16 active dry electrodes with up to three game sessions. The experiment took place at GIPSA-lab, Grenoble, France, in 2014. A full description of the experiment is available - at [1]_. The ID of this dataset is bi2014a. + at [1]_. The ID of this dataset is BI2014a. :Investigators: Eng. Louis Korczowski, B. Sc. Ekaterina Ostaschenko :Technical Support: Eng. Anton Andreev, Eng. Grégoire Cattan, Eng. Pedro. L. C. Rodrigues, @@ -603,7 +607,7 @@ class bi2014a(BaseDataset): .. [1] Korczowski, L., Ostaschenko, E., Andreev, A., Cattan, G., Rodrigues, P. L. C., Gautheret, V., & Congedo, M. (2019). Brain Invaders calibration-less P300-based - BCI using dry EEG electrodes Dataset (bi2014a). + BCI using dry EEG electrodes Dataset (BI2014a). https://hal.archives-ouvertes.fr/hal-02171575 """ @@ -628,14 +632,15 @@ def data_path( return _bi_data_path(self, subject, path, force_update, update_path, verbose) -class bi2014b(BaseDataset): - """P300 dataset bi2014b from a "Brain Invaders" experiment. +@depreciated_alias("bi2014b", "0.7") +class BI2014b(BaseDataset): + """P300 dataset BI2014b from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2014b 37 32 5 NT x 1 T 1s 512Hz 3 + BI2014b 38 32 5 NT x 1 T 1s 512Hz 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings of 38 subjects playing in @@ -645,7 +650,7 @@ class bi2014b(BaseDataset): appearing about 300ms after stimulation onset. EEG data were recorded using 32 active wet electrodes per subjects (total: 64 electrodes) during three randomized conditions (Solo1, Solo2, Collaboration). The experiment took place at GIPSA-lab, Grenoble, France, in 2014. - A full description of the experiment is available at [1]_. The ID of this dataset is bi2014b. + A full description of the experiment is available at [1]_. The ID of this dataset is BI2014b. :Investigators: Eng. Louis Korczowski, B. Sc. Ekaterina Ostaschenko :Technical Support: Eng. Anton Andreev, Eng. Grégoire Cattan, Eng. Pedro. L. C. Rodrigues, @@ -662,13 +667,13 @@ class bi2014b(BaseDataset): .. [1] Korczowski, L., Ostaschenko, E., Andreev, A., Cattan, G., Rodrigues, P. L. C., Gautheret, V., & Congedo, M. (2019). Brain Invaders Solo versus Collaboration: - Multi-User P300-Based Brain-Computer Interface Dataset (bi2014b). + Multi-User P300-Based Brain-Computer Interface Dataset (BI2014b). https://hal.archives-ouvertes.fr/hal-02173958 """ def __init__(self): super().__init__( - subjects=list(range(1, 38)), + subjects=list(range(1, 39)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), code="BrainInvaders2014b", @@ -687,14 +692,15 @@ def data_path( return _bi_data_path(self, subject, path, force_update, update_path, verbose) -class bi2015a(BaseDataset): - """P300 dataset bi2015a from a "Brain Invaders" experiment. +@depreciated_alias("bi2015a", "0.7") +class BI2015a(BaseDataset): + """P300 dataset BI2015a from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2015a 43 32 5 NT x 1 T 1s 512Hz 3 + BI2015a 43 32 5 NT x 1 T 1s 512Hz 3 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings @@ -705,7 +711,7 @@ class bi2015a(BaseDataset): 32 active wet electrodes with three conditions: flash duration 50ms, 80ms or 110ms. The experiment took place at GIPSA-lab, Grenoble, France, in 2015. A full description of the experiment is available at [1]_. The ID of this - dataset is bi2015a. + dataset is BI2015a. :Investigators: Eng. Louis Korczowski, B. Sc. Martine Cederhout :Technical Support: Eng. Anton Andreev, Eng. Grégoire Cattan, Eng. Pedro. L. C. Rodrigues, @@ -722,7 +728,7 @@ class bi2015a(BaseDataset): .. [1] Korczowski, L., Cederhout, M., Andreev, A., Cattan, G., Rodrigues, P. L. C., Gautheret, V., & Congedo, M. (2019). Brain Invaders calibration-less P300-based - BCI with modulation of flash duration Dataset (bi2015a) + BCI with modulation of flash duration Dataset (BI2015a) https://hal.archives-ouvertes.fr/hal-02172347 """ @@ -747,14 +753,15 @@ def data_path( return _bi_data_path(self, subject, path, force_update, update_path, verbose) -class bi2015b(BaseDataset): - """P300 dataset bi2015b from a "Brain Invaders" experiment. +@depreciated_alias("bi2015b", "0.7") +class BI2015b(BaseDataset): + """P300 dataset BI2015b from a "Brain Invaders" experiment. .. admonition:: Dataset summary ================ ======= ======= ================ =============== =============== =========== Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions ================ ======= ======= ================ =============== =============== =========== - bi2015b 44 32 5 NT x 1 T 1s 512Hz 2 + BI2015b 44 32 5 NT x 1 T 1s 512Hz 1 ================ ======= ======= ================ =============== =============== =========== This dataset contains electroencephalographic (EEG) recordings @@ -768,7 +775,7 @@ class bi2015b(BaseDataset): Competition 2-Targets). The experiment took place at GIPSA-lab, Grenoble, France, in 2015. A full description of the experiment is available at A full description of the experiment is available at [1]_. The ID of this - dataset is bi2015a. + dataset is BI2015a. :Investigators: Eng. Louis Korczowski, B. Sc. Martine Cederhout :Technical Support: Eng. Anton Andreev, Eng. Grégoire Cattan, Eng. Pedro. L. C. Rodrigues, @@ -785,7 +792,7 @@ class bi2015b(BaseDataset): .. [1] Korczowski, L., Cederhout, M., Andreev, A., Cattan, G., Rodrigues, P. L. C., Gautheret, V., & Congedo, M. (2019). Brain Invaders Cooperative versus Competitive: - Multi-User P300-based Brain-Computer Interface Dataset (bi2015b) + Multi-User P300-based Brain-Computer Interface Dataset (BI2015b) https://hal.archives-ouvertes.fr/hal-02172347 """ @@ -810,15 +817,16 @@ def data_path( return _bi_data_path(self, subject, path, force_update, update_path, verbose) -class VirtualReality(BaseDataset): +@depreciated_alias("VirtualReality", "0.7") +class Cattan2019_VR(BaseDataset): """Dataset of an EEG-based BCI experiment in Virtual Reality using P300. .. admonition:: Dataset summary - ================ ======= ======= ================ =============== =============== =========== - Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions - ================ ======= ======= ================ =============== =============== =========== - VirtualReality 21 16 600 NT / 120 T 1s 512Hz 2 - ================ ======= ======= ================ =============== =============== =========== + ============== ======= ======= ================ =============== =============== =========== + Name #Subj #Chan #Trials/class Trials length Sampling Rate #Sessions + ============== ======= ======= ================ =============== =============== =========== + Cattan2019_VR 21 16 600 NT / 120 T 1s 512Hz 2 + ============== ======= ======= ================ =============== =============== =========== We describe the experimental procedures for a dataset that we have made publicly available at https://doi.org/10.5281/zenodo.2605204 in mat (Mathworks, Natick, USA) @@ -863,7 +871,7 @@ def __init__(self, virtual_reality=False, screen_display=True): subjects=list(range(1, 21 + 1)), sessions_per_subject=1, events=dict(Target=2, NonTarget=1), - code="VR-P300", + code="Cattan2019-VR", # before: "VR-P300" interval=[0, 1.0], paradigm="p300", doi="https://doi.org/10.5281/zenodo.2605204", @@ -873,7 +881,7 @@ def __init__(self, virtual_reality=False, screen_display=True): self.personal_computer = screen_display if not self.virtual_reality and not self.personal_computer: warn( - "[P300-VR dataset] virtual_reality and screen display are False. No data will be downloaded, unless you change these parameters after initialization." + "[Cattan2019-VR dataset] virtual_reality and screen display are False. No data will be downloaded, unless you change these parameters after initialization." ) def _get_single_subject_data(self, subject): diff --git a/moabb/datasets/compound_dataset/__init__.py b/moabb/datasets/compound_dataset/__init__.py index c7a6ffecd..6232e2129 100644 --- a/moabb/datasets/compound_dataset/__init__.py +++ b/moabb/datasets/compound_dataset/__init__.py @@ -1,10 +1,23 @@ # flake8: noqa from .base import CompoundDataset from .bi_illiteracy import ( - VirtualReality_il, - bi2014a_il, - bi2014b_il, - bi2015a_il, - bi2015b_il, - biIlliteracy, + BI2014a_Il, + BI2014b_Il, + BI2015a_Il, + BI2015b_Il, + BI_Il, + Cattan2019_VR_Il, ) +from .utils import _init_compound_dataset_list + + +_init_compound_dataset_list() +del _init_compound_dataset_list + +# Depreciated datasets (not added to dataset_list): +from .bi_illiteracy import VirtualReality_il # noqa: F401 +from .bi_illiteracy import bi2014a_il # noqa: F401 +from .bi_illiteracy import bi2014b_il # noqa: F401 +from .bi_illiteracy import bi2015a_il # noqa: F401 +from .bi_illiteracy import bi2015b_il # noqa: F401 +from .bi_illiteracy import biIlliteracy # noqa: F401 diff --git a/moabb/datasets/compound_dataset/bi_illiteracy.py b/moabb/datasets/compound_dataset/bi_illiteracy.py index f973b75cb..ef5725f80 100644 --- a/moabb/datasets/compound_dataset/bi_illiteracy.py +++ b/moabb/datasets/compound_dataset/bi_illiteracy.py @@ -1,10 +1,15 @@ -from ..braininvaders import VirtualReality, bi2014a, bi2014b, bi2015a, bi2015b +from moabb.utils import depreciated_alias + +from ..braininvaders import BI2014a, BI2014b, BI2015a, BI2015b, Cattan2019_VR from .base import CompoundDataset class _base_bi_il(CompoundDataset): - def __init__(self, subjects_list, dataset=None): - code = "Illiteracy" if dataset is None else f"{dataset.code}+IL" + def __init__(self, subjects_list, dataset=None, code=None): + if code is None and dataset is None: + raise ValueError("Either code or dataset must be provided") + if code is None: + code = f"{dataset.code}-Il" CompoundDataset.__init__( self, subjects_list=subjects_list, @@ -15,13 +20,14 @@ def __init__(self, subjects_list, dataset=None): ) -class bi2014a_il(_base_bi_il): - """A selection of subject from bi2014a with AUC < 0.7 with pipeline: +@depreciated_alias("bi2014a_il", "0.7") +class BI2014a_Il(_base_bi_il): + """A selection of subject from BI2014a with AUC < 0.7 with pipeline: ERPCovariances(estimator="lwf"), MDM(metric="riemann") """ def __init__(self): - dataset = bi2014a() + dataset = BI2014a() subjects_list = [ (dataset, 4, None, None), (dataset, 7, None, None), @@ -44,13 +50,14 @@ def __init__(self): _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) -class bi2014b_il(_base_bi_il): - """A selection of subject from bi2014b with AUC < 0.7 with pipeline: +@depreciated_alias("bi2014b_il", "0.7") +class BI2014b_Il(_base_bi_il): + """A selection of subject from BI2014b with AUC < 0.7 with pipeline: ERPCovariances(estimator="lwf"), MDM(metric="riemann") """ def __init__(self): - dataset = bi2014b() + dataset = BI2014b() subjects_list = [ (dataset, 2, None, None), (dataset, 7, None, None), @@ -67,13 +74,14 @@ def __init__(self): _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) -class bi2015a_il(_base_bi_il): - """A selection of subject from bi2015a with AUC < 0.7 with pipeline: +@depreciated_alias("bi2015a_il", "0.7") +class BI2015a_Il(_base_bi_il): + """A selection of subject from BI2015a with AUC < 0.7 with pipeline: ERPCovariances(estimator="lwf"), MDM(metric="riemann") """ def __init__(self): - dataset = bi2015a() + dataset = BI2015a() subjects_list = [ (dataset, 1, ["session_1", "session_2", "session_3"], None), (dataset, 39, ["session_2", "session_3"], None), @@ -81,13 +89,14 @@ def __init__(self): _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) -class bi2015b_il(_base_bi_il): - """A selection of subject from bi2015b with AUC < 0.7 with pipeline: +@depreciated_alias("bi2015b_il", "0.7") +class BI2015b_Il(_base_bi_il): + """A selection of subject from BI2015b with AUC < 0.7 with pipeline: ERPCovariances(estimator="lwf"), MDM(metric="riemann") """ def __init__(self): - dataset = bi2015b() + dataset = BI2015b() subjects_list = [ (dataset, 2, None, None), (dataset, 4, None, None), @@ -118,13 +127,14 @@ def __init__(self): _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) -class VirtualReality_il(_base_bi_il): - """A selection of subject from VirtualReality with AUC < 0.7 with pipeline: +@depreciated_alias("VirtualReality_il", "0.7") +class Cattan2019_VR_Il(_base_bi_il): + """A selection of subject from Cattan2019_VR with AUC < 0.7 with pipeline: ERPCovariances(estimator="lwf"), MDM(metric="riemann") """ def __init__(self): - dataset = VirtualReality(virtual_reality=True, screen_display=True) + dataset = Cattan2019_VR(virtual_reality=True, screen_display=True) subjects_list = [ (dataset, 4, None, None), (dataset, 10, None, None), @@ -134,17 +144,18 @@ def __init__(self): _base_bi_il.__init__(self, subjects_list=subjects_list, dataset=dataset) -class biIlliteracy(_base_bi_il): +@depreciated_alias("biIlliteracy", "0.7") +class BI_Il(_base_bi_il): """Subjects from braininvaders datasets with AUC < 0.7 with pipeline: ERPCovariances(estimator="lwf"), MDM(metric="riemann") """ def __init__(self): subjects_list = [ - bi2014a_il(), - bi2014b_il(), - bi2015a_il(), - bi2015b_il(), - VirtualReality_il(), + BI2014a_Il(), + BI2014b_Il(), + BI2015a_Il(), + BI2015b_Il(), + Cattan2019_VR_Il(), ] - _base_bi_il.__init__(self, subjects_list=subjects_list) + _base_bi_il.__init__(self, subjects_list=subjects_list, code="BrainInvaders-Il") diff --git a/moabb/datasets/compound_dataset/utils.py b/moabb/datasets/compound_dataset/utils.py new file mode 100644 index 000000000..f41e0914c --- /dev/null +++ b/moabb/datasets/compound_dataset/utils.py @@ -0,0 +1,13 @@ +import inspect + +import moabb.datasets.compound_dataset as db +from moabb.datasets.compound_dataset.base import CompoundDataset + + +compound_dataset_list = [] + + +def _init_compound_dataset_list(): + for ds in inspect.getmembers(db, inspect.isclass): + if issubclass(ds[1], CompoundDataset) and not ds[0] == "CompoundDataset": + compound_dataset_list.append(ds[1]) diff --git a/moabb/datasets/epfl.py b/moabb/datasets/epfl.py index 399a23698..6a4da0c47 100644 --- a/moabb/datasets/epfl.py +++ b/moabb/datasets/epfl.py @@ -75,7 +75,7 @@ def __init__(self): subjects=[1, 2, 3, 4, 6, 7, 8, 9], sessions_per_subject=4, events=dict(Target=2, NonTarget=1), - code="EPFL-P300", + code="EPFLP300", interval=[0, 1], paradigm="p300", doi="10.1016/j.jneumeth.2007.03.005", diff --git a/moabb/datasets/fake.py b/moabb/datasets/fake.py index 8e0fc0ce3..e4e0b62f2 100644 --- a/moabb/datasets/fake.py +++ b/moabb/datasets/fake.py @@ -7,7 +7,7 @@ from mne.io import RawArray from moabb.datasets.base import BaseDataset -from moabb.datasets.braininvaders import VirtualReality +from moabb.datasets.braininvaders import Cattan2019_VR from moabb.datasets.utils import block_rep @@ -48,8 +48,9 @@ def __init__( event_id = {ev: ii + 1 for ii, ev in enumerate(event_list)} self.channels = channels code = ( - f"{code}-{paradigm.capitalize()}-{n_subjects}-{n_sessions}-{n_runs}-" - f"{''.join([e.replace('_', '').capitalize() for e in event_list])}-{''.join(channels)}" + f"{code}-{paradigm.lower()}-{n_subjects}-{n_sessions}-{n_runs}-" + f"{''.join([e.replace('_', '').lower() for e in event_list])}-" + f"{''.join([c.lower() for c in channels])}" ) super().__init__( subjects=list(range(1, n_subjects + 1)), @@ -101,7 +102,7 @@ def data_path( class FakeVirtualRealityDataset(FakeDataset): - """Fake VirtualReality dataset for test purpose. + """Fake Cattan2019_VR dataset for test purpose. .. versionadded:: 0.5.0 """ @@ -144,7 +145,7 @@ def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): See also -------- BaseDataset.get_data - VirtualReality.get_block_repetition + Cattan2019_VR.get_block_repetition Parameters ---------- @@ -160,6 +161,6 @@ def get_block_repetition(self, paradigm, subjects, block_list, repetition_list): data: Dict dict containing the raw data """ - return VirtualReality.get_block_repetition( + return Cattan2019_VR.get_block_repetition( self, paradigm, subjects, block_list, repetition_list ) diff --git a/moabb/datasets/huebner_llp.py b/moabb/datasets/huebner_llp.py index b96847b4c..842257547 100644 --- a/moabb/datasets/huebner_llp.py +++ b/moabb/datasets/huebner_llp.py @@ -172,7 +172,7 @@ def __init__(self, interval=None, raw_slice_offset=None, use_blocks_as_sessions= raw_slice_offset=raw_slice_offset, n_subjects=13, sessions_per_subject=1, # if varying, take minimum - code="VisualSpellerLLP", + code="Huebner2017", # Before: "VisualSpellerLLP" interval=interval, doi=llp_speller_paper_doi, use_blocks_as_sessions=use_blocks_as_sessions, @@ -231,7 +231,7 @@ def __init__(self, interval=None, raw_slice_offset=None, use_blocks_as_sessions= raw_slice_offset=raw_slice_offset, n_subjects=12, sessions_per_subject=1, # if varying, take minimum - code="VisualSpellerMIX", + code="Huebner2018", # Before: "VisualSpellerMIX" interval=interval, doi=mix_speller_paper_doi, use_blocks_as_sessions=use_blocks_as_sessions, diff --git a/moabb/datasets/mpi_mi.py b/moabb/datasets/mpi_mi.py index c1df63ed2..1e165aa42 100644 --- a/moabb/datasets/mpi_mi.py +++ b/moabb/datasets/mpi_mi.py @@ -5,22 +5,24 @@ from moabb.datasets import download as dl from moabb.datasets.base import BaseDataset +from moabb.utils import depreciated_alias DOWNLOAD_URL = "https://zenodo.org/record/1217449/files/" -class MunichMI(BaseDataset): +@depreciated_alias("MunichMI", "0.7") +class GrosseWentrup2009(BaseDataset): """Munich Motor Imagery dataset. .. admonition:: Dataset summary - ======== ======= ======= ========== ================= ============ =============== =========== - Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions - ======== ======= ======= ========== ================= ============ =============== =========== - MunichMI 10 128 2 150 7s 500Hz 1 - ======== ======= ======= ========== ================= ============ =============== =========== + ================= ======= ======= ========== ================= ============ =============== =========== + Name #Subj #Chan #Classes #Trials / class Trials len Sampling rate #Sessions + ================= ======= ======= ========== ================= ============ =============== =========== + GrosseWentrup2009 10 128 2 150 7s 500Hz 1 + ================= ======= ======= ========== ================= ============ =============== =========== Motor imagery dataset from Grosse-Wentrup et al. 2009 [1]_. diff --git a/moabb/datasets/neiry.py b/moabb/datasets/neiry.py index 0b7afb7f8..67912b49c 100644 --- a/moabb/datasets/neiry.py +++ b/moabb/datasets/neiry.py @@ -110,7 +110,7 @@ def __init__(self): subjects=list(range(60)), sessions_per_subject=1, events={"Target": 1, "NonTarget": 2}, - code="Demons-P300", + code="DemonsP300", interval=[0, 1], paradigm="p300", ) diff --git a/moabb/datasets/phmd_ml.py b/moabb/datasets/phmd_ml.py index fb369680b..667ed1ba6 100644 --- a/moabb/datasets/phmd_ml.py +++ b/moabb/datasets/phmd_ml.py @@ -4,6 +4,8 @@ import numpy as np from scipy.io import loadmat +from moabb.utils import depreciated_alias + from . import download as dl from .base import BaseDataset @@ -11,17 +13,18 @@ HEADMOUNTED_URL = "https://zenodo.org/record/2617085/files/" -class HeadMountedDisplay(BaseDataset): +@depreciated_alias("HeadMountedDisplay", "0.7") +class Cattan2019_PHMD(BaseDataset): """Passive Head Mounted Display with Music Listening dataset. .. admonition:: Dataset summary - ================= ======= ======= ========== ================= ============ =============== =========== - Name #Subj #Chan #Classes #Blocks/class Trials len Sampling rate #Sessions - ================== ======= ======= ========== ================= ============ =============== =========== - HeadMountedDisplay 12 16 2 10 60s 512Hz 1 - ================== ======= ======= ========== ================= ============ =============== =========== + ============== ======= ======= ========== ================= ============ =============== =========== + Name #Subj #Chan #Classes #Blocks/class Trials len Sampling rate #Sessions + =============== ======= ======= ========== ================= ============ =============== =========== + Cattan2019_PHMD 12 16 2 10 60s 512Hz 1 + =============== ======= ======= ========== ================= ============ =============== =========== We describe the experimental procedures for a dataset that we have made publicly available at https://doi.org/10.5281/zenodo.2617084 in mat (Mathworks, Natick, USA) and csv formats. @@ -64,7 +67,7 @@ def __init__(self): subjects=list(range(1, 12 + 1)), sessions_per_subject=1, events=dict(on=1, off=2), - code="PHMD-ML", + code="Cattan2019-PHMD", # Before: "PHMD-ML" interval=[0, 1], paradigm="rstate", doi="https://doi.org/10.5281/zenodo.2617084 ", diff --git a/moabb/datasets/sosulski2019.py b/moabb/datasets/sosulski2019.py index 84a97956e..7f0ebb3e5 100644 --- a/moabb/datasets/sosulski2019.py +++ b/moabb/datasets/sosulski2019.py @@ -25,7 +25,7 @@ class Sosulski2019(BaseDataset): ============= ======= ======= ================= =============== =============== =========== Name #Subj #Chan #Trials / class Trials length Sampling rate #Sessions ============= ======= ======= ================= =============== =============== =========== - Sosulski2019 13 31 75 NT / 15 T 1000Hz 3 + Sosulski2019 13 31 75 NT / 15 T 1.2s 1000Hz 3 ============= ======= ======= ================= =============== =============== =========== **Dataset description** @@ -104,7 +104,7 @@ def __init__( self.n_channels = 31 self.use_soas_as_sessions = use_soas_as_sessions self.description_map = {"Stimulus/S 21": "Target", "Stimulus/S 1": "NonTarget"} - code = "SpotPilot-P300" + code = "Sosulski2019" interval = [-0.2, 1] if interval is None else interval super().__init__( subjects=list(range(1, 13 + 1)), diff --git a/moabb/datasets/ssvep_exo.py b/moabb/datasets/ssvep_exo.py index 7deb23c54..2993f4d2a 100644 --- a/moabb/datasets/ssvep_exo.py +++ b/moabb/datasets/ssvep_exo.py @@ -2,6 +2,8 @@ from mne.io import Raw +from moabb.utils import depreciated_alias + from . import download as dl from .base import BaseDataset @@ -9,17 +11,18 @@ SSVEPEXO_URL = "https://zenodo.org/record/2392979/files/" -class SSVEPExo(BaseDataset): +@depreciated_alias("SSVEPExo", "0.7") +class Kalunga2016(BaseDataset): """SSVEP Exo dataset. .. admonition:: Dataset summary - ======== ======= ======= ========== ================= =============== =============== =========== - Name #Subj #Chan #Classes #Trials / class Trials length Sampling rate #Sessions - ======== ======= ======= ========== ================= =============== =============== =========== - SSVEPExo 12 8 4 16 2s 256Hz 1 - ======== ======= ======= ========== ================= =============== =============== =========== + =========== ======= ======= ========== ================= =============== =============== =========== + Name #Subj #Chan #Classes #Trials / class Trials length Sampling rate #Sessions + =========== ======= ======= ========== ================= =============== =============== =========== + Kalunga2016 12 8 4 16 2s 256Hz 1 + =========== ======= ======= ========== ================= =============== =============== =========== SSVEP dataset from E. Kalunga PhD in University of Versailles [1]_. @@ -60,7 +63,7 @@ def __init__(self): subjects=list(range(1, 13)), sessions_per_subject=1, events={"13": 2, "17": 3, "21": 4, "rest": 1}, - code="Exoskeleton-SSVEP", + code="Kalunga2016", interval=[2, 4], paradigm="ssvep", doi="10.1016/j.neucom.2016.01.007", diff --git a/moabb/datasets/ssvep_mamem.py b/moabb/datasets/ssvep_mamem.py index c90cc0671..1c20574b8 100644 --- a/moabb/datasets/ssvep_mamem.py +++ b/moabb/datasets/ssvep_mamem.py @@ -109,13 +109,13 @@ def _get_single_subject_data(self, subject): if fnamed[4] == "x": continue session_name = "session_0" - if self.code == "MAMEM3-SSVEP": + if self.code == "MAMEM3": repetition = len(fnamed) - 10 run_name = f"run_{(ord(fnamed[4]) - 97) * 2 + repetition}" else: run_name = f"run_{ord(fnamed[4]) - 97}" - if self.code == "MAMEM3-SSVEP": + if self.code == "MAMEM3": m = loadmat(fpath) ch_names = [e[0] for e in m["info"][0, 0][9][0]] sfreq = 128 @@ -126,7 +126,7 @@ def _get_single_subject_data(self, subject): ch_names = [f"E{i + 1}" for i in range(0, 256)] ch_names.append("stim") sfreq = 250 - if self.code == "MAMEM2-SSVEP": + if self.code == "MAMEM2": labels = m["labels"] else: labels = None @@ -281,7 +281,7 @@ def __init__(self): events={"6.66": 1, "7.50": 2, "8.57": 3, "10.00": 4, "12.00": 5}, sessions_per_subject=1, # 5 runs per sessions, except 3 for S001, S003, S008, 4 for S004 - code="MAMEM1-SSVEP", + code="MAMEM1", doi="https://arxiv.org/abs/1602.00904", figshare_id=2068677, ) @@ -374,7 +374,7 @@ def __init__(self): super().__init__( events={"6.66": 1, "7.50": 2, "8.57": 3, "10.00": 4, "12.00": 5}, sessions_per_subject=1, - code="MAMEM2-SSVEP", + code="MAMEM2", doi="https://arxiv.org/abs/1602.00904", figshare_id=3153409, ) @@ -482,7 +482,7 @@ def __init__(self): "12.00": 33025, }, sessions_per_subject=1, - code="MAMEM3-SSVEP", + code="MAMEM3", doi="https://arxiv.org/abs/1602.00904", figshare_id=3413851, ) diff --git a/moabb/datasets/ssvep_nakanishi.py b/moabb/datasets/ssvep_nakanishi.py index 058006e94..0d172e448 100644 --- a/moabb/datasets/ssvep_nakanishi.py +++ b/moabb/datasets/ssvep_nakanishi.py @@ -61,7 +61,7 @@ def __init__(self): "12.75": 11, "14.75": 12, }, - code="Nakanishi-SSVEP", + code="Nakanishi2015", interval=[0.15, 4.3], paradigm="ssvep", doi="doi.org/10.1371/journal.pone.0140703", diff --git a/moabb/datasets/ssvep_wang.py b/moabb/datasets/ssvep_wang.py index 572874a15..faf8fd6a6 100644 --- a/moabb/datasets/ssvep_wang.py +++ b/moabb/datasets/ssvep_wang.py @@ -114,7 +114,7 @@ def __init__(self): subjects=list(range(1, 35)), sessions_per_subject=1, events=self._events, - code="Wang-SSVEP", + code="Wang2016", interval=[0.5, 5.5], paradigm="ssvep", doi="doi://10.1109/TNSRE.2016.2627556", diff --git a/moabb/tests/benchmark.py b/moabb/tests/benchmark.py index 7ac549875..c838c484f 100644 --- a/moabb/tests/benchmark.py +++ b/moabb/tests/benchmark.py @@ -21,9 +21,9 @@ def test_benchmark_strdataset(self): pipelines=str(self.pp_dir), evaluations=["WithinSession"], include_datasets=[ - "FakeDataset-Imagery-10-2-2-LefthandRighthand-C3CzC4", - "FakeDataset-P300-10-2-2-TargetNontarget-C3CzC4", - "FakeDataset-Ssvep-10-2-2-1315-C3CzC4", + "FakeDataset-imagery-10-2-2-lefthandrighthand-c3czc4", + "FakeDataset-p300-10-2-2-targetnontarget-c3czc4", + "FakeDataset-ssvep-10-2-2-1315-c3czc4", ], overwrite=True, ) diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index ea64feb53..ae4457945 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -1,4 +1,5 @@ import inspect +import logging import shutil import tempfile import unittest @@ -6,12 +7,15 @@ import mne import moabb.datasets as db -from moabb.datasets import Shin2017A, Shin2017B, VirtualReality -from moabb.datasets.base import BaseDataset +import moabb.datasets.compound_dataset as db_compound +from moabb.datasets import Cattan2019_VR, Shin2017A, Shin2017B +from moabb.datasets.base import BaseDataset, is_abbrev, is_camel_kebab_case from moabb.datasets.compound_dataset import CompoundDataset +from moabb.datasets.compound_dataset.utils import compound_dataset_list from moabb.datasets.fake import FakeDataset, FakeVirtualRealityDataset from moabb.datasets.utils import block_rep, dataset_list from moabb.paradigms import P300 +from moabb.utils import aliases_list _ = mne.set_log_level("CRITICAL") @@ -33,6 +37,32 @@ def _run_tests_on_dataset(d): print(d.event_id) +class TestRegex(unittest.TestCase): + def test_is_abbrev(self): + assert is_abbrev("a", "a-") + assert is_abbrev("a", "a0") + assert is_abbrev("a", "ab") + assert not is_abbrev("a", "aA") + assert not is_abbrev("a", "Aa") + assert not is_abbrev("a", "-a") + assert not is_abbrev("a", "0a") + assert not is_abbrev("a", "ba") + assert not is_abbrev("a", "a ") + + def test_is_camell_kebab_case(self): + assert is_camel_kebab_case("Aa") + assert is_camel_kebab_case("aAa") + assert is_camel_kebab_case("Aa-a") + assert is_camel_kebab_case("1Aa-1a1") + assert is_camel_kebab_case("AB") + assert not is_camel_kebab_case("A ") + assert not is_camel_kebab_case(" A") + assert not is_camel_kebab_case("A A") + assert not is_camel_kebab_case("A_") + assert not is_camel_kebab_case("_A") + assert not is_camel_kebab_case("A_A") + + class Test_Datasets(unittest.TestCase): def test_fake_dataset(self): """This test will insure the basedataset works.""" @@ -144,28 +174,64 @@ def test_cache_dataset(self): def test_dataset_accept(self): """Verify that accept licence is working.""" - # Only Shin2017 (bbci_eeg_fnirs) for now + # Only BaseShin2017 (bbci_eeg_fnirs) for now for ds in [Shin2017A(), Shin2017B()]: # if the data is already downloaded: if mne.get_config("MNE_DATASETS_BBCIFNIRS_PATH") is None: self.assertRaises(AttributeError, ds.get_data, [1]) def test_datasets_init(self): + codes = [] + logger = logging.getLogger("moabb.datasets.base") for ds in dataset_list: kwargs = {} if inspect.signature(ds).parameters.get("accept"): kwargs["accept"] = True - self.assertIsNotNone(ds(**kwargs)) + with self.assertLogs(logger="moabb.datasets.base", level="WARNING") as cm: + # We test if the is_abrev does not throw a warning. + # Trick needed because assertNoLogs only inrtoduced in python 3.10: + logger.warning(f"Testing {ds.__name__}") + obj = ds(**kwargs) + self.assertEqual(len(cm.output), 1) + self.assertIsNotNone(obj) + codes.append(obj.code) + + # Check that all codes are unique: + self.assertEqual(len(codes), len(set(codes))) + + def test_depreciated_datasets_init(self): + depreciated_names, _, _ = zip(*aliases_list) + for ds in db.__dict__.values(): + if ds in dataset_list: + continue + if not (inspect.isclass(ds) and issubclass(ds, BaseDataset)): + continue + kwargs = {} + if inspect.signature(ds).parameters.get("accept"): + kwargs["accept"] = True + with self.assertLogs(logger="moabb.utils", level="WARNING"): + # We test if depreciated_alias throws a warning. + obj = ds(**kwargs) + self.assertIsNotNone(obj) + self.assertIn(ds.__name__, depreciated_names) def test_dataset_list(self): - all_datasets = len( - [ - issubclass(c, BaseDataset) - for c in db.__dict__.values() - if inspect.isclass(c) - ] - ) - assert len(dataset_list) == all_datasets + if aliases_list: + depreciated_list, _, _ = zip(*aliases_list) + else: + depreciated_list = [] + all_datasets = [ + c + for c in db.__dict__.values() + if ( + inspect.isclass(c) + and issubclass(c, BaseDataset) + and c.__name__ not in depreciated_list + ) + ] + + assert len(dataset_list) == len(all_datasets) + assert set(dataset_list) == set(all_datasets) class Test_VirtualReality_Dataset(unittest.TestCase): @@ -173,14 +239,14 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def test_canary(self): - assert VirtualReality() is not None + assert Cattan2019_VR() is not None def test_warning_if_parameters_false(self): with self.assertWarns(UserWarning): - VirtualReality(virtual_reality=False, screen_display=False) + Cattan2019_VR(virtual_reality=False, screen_display=False) def test_data_path(self): - ds = VirtualReality(virtual_reality=True, screen_display=True) + ds = Cattan2019_VR(virtual_reality=True, screen_display=True) data_path = ds.data_path(1) assert len(data_path) == 2 assert "subject_01_VR.mat" in data_path[0] @@ -220,7 +286,7 @@ def test_fake_dataset(self): compound_data = CompoundDataset( subjects_list, events=dict(Target=2, NonTarget=1), - code="CompoundTest", + code="CompoundDataset-test", interval=[0, 1], paradigm=self.paradigm, ) @@ -249,7 +315,7 @@ def test_compound_dataset_composition(self): compound_dataset = CompoundDataset( subjects_list, events=dict(Target=2, NonTarget=1), - code="D1", + code="CompoundDataset-test", interval=[0, 1], paradigm=self.paradigm, ) @@ -259,7 +325,7 @@ def test_compound_dataset_composition(self): compound_data = CompoundDataset( subjects_list, events=dict(Target=2, NonTarget=1), - code="CompoundTest", + code="CompoundDataset-test", interval=[0, 1], paradigm=self.paradigm, ) @@ -283,10 +349,41 @@ def test_get_sessions_per_subject(self): compound_dataset = CompoundDataset( subjects_list, events=dict(Target=2, NonTarget=1), - code="CompoundTest", + code="CompoundDataset", interval=[0, 1], paradigm=self.paradigm, ) # Test private method _get_sessions_per_subject returns the minimum number of sessions per subjects self.assertEqual(compound_dataset._get_sessions_per_subject(), self.n_sessions) + + def test_datasets_init(self): + codes = [] + for ds in compound_dataset_list: + kwargs = {} + if inspect.signature(ds).parameters.get("accept"): + kwargs["accept"] = True + obj = ds(**kwargs) + self.assertIsNotNone(obj) + codes.append(obj.code) + + # Check that all codes are unique: + self.assertEqual(len(codes), len(set(codes))) + + def test_dataset_list(self): + if aliases_list: + depreciated_list, _, _ = zip(*aliases_list) + else: + depreciated_list = [] + all_datasets = [ + c + for c in db_compound.__dict__.values() + if ( + inspect.isclass(c) + and issubclass(c, CompoundDataset) + and c.__name__ not in depreciated_list + and c.__name__ != "CompoundDataset" + ) + ] + assert len(compound_dataset_list) == len(all_datasets) + assert set(compound_dataset_list) == set(all_datasets) diff --git a/moabb/tests/download.py b/moabb/tests/download.py index e9dd33731..294c98d2c 100644 --- a/moabb/tests/download.py +++ b/moabb/tests/download.py @@ -3,23 +3,23 @@ import mne -from moabb.datasets.bbci_eeg_fnirs import Shin2017 +from moabb.datasets.bbci_eeg_fnirs import BaseShin2017 # from moabb.datasets.gigadb import Cho2017 # from moabb.datasets.alex_mi import AlexMI # from moabb.datasets.physionet_mi import PhysionetMI -# from moabb.datasets.bnci import (BNCI2014001, BNCI2014002, BNCI2014004, -# BNCI2014008, BNCI2014009, BNCI2015001, -# BNCI2015003, BNCI2015004) +# from moabb.datasets.bnci import (BNCI2014_001, BNCI2014_002, BNCI2014_004, +# BNCI2014_008, BNCI2014_009, BNCI2015_001, +# BNCI2015_003, BNCI2015_004) # from moabb.datasets.bbci_eeg_fnirs import Shin2017A, Shin2017B # from moabb.datasets.upper_limb import Ofner2017 -# from moabb.datasets.mpi_mi import MunichMI +# from moabb.datasets.mpi_mi import GrosseWentrup2009 # from moabb.datasets.schirrmeister2017 import Schirrmeister2017 # from moabb.datasets.Weibo2014 import Weibo2014 # from moabb.datasets.Zhou2016 import Zhou2016 -# from moabb.datasets.ssvep_exo import SSVEPExo -# from moabb.datasets.braininvaders import bi2013a +# from moabb.datasets.ssvep_exo import Kalunga2016 +# from moabb.datasets.braininvaders import BI2013a # from moabb.datasets.epfl import EPFLP300 # from moabb.datasets.Lee2019 import Lee2019_MI # from moabb.datasets.neiry import DemonsP300 @@ -39,7 +39,7 @@ def _get_events(raw): events, _ = mne.events_from_annotations(raw, verbose=False) return events - if isinstance(dataset(), Shin2017): + if isinstance(dataset(), BaseShin2017): obj = dataset(accept=True) else: obj = dataset() @@ -72,14 +72,14 @@ def _get_events(raw): # self.run_dataset(Cho2017) # def test_bnci(self): - # self.run_dataset(BNCI2014001) - # self.run_dataset(BNCI2014002) - # self.run_dataset(BNCI2014004) - # self.run_dataset(BNCI2014008) - # self.run_dataset(BNCI2014009) - # self.run_dataset(BNCI2015001) - # self.run_dataset(BNCI2015003) - # self.run_dataset(BNCI2015004) + # self.run_dataset(BNCI2014_001) + # self.run_dataset(BNCI2014_002) + # self.run_dataset(BNCI2014_004) + # self.run_dataset(BNCI2014_008) + # self.run_dataset(BNCI2014_009) + # self.run_dataset(BNCI2015_001) + # self.run_dataset(BNCI2015_003) + # self.run_dataset(BNCI2015_004) # def test_alexmi(self): # self.run_dataset(AlexMI) @@ -95,7 +95,7 @@ def _get_events(raw): # self.run_dataset(Ofner2017) # def test_mpi_mi(self): - # self.run_dataset(MunichMI) + # self.run_dataset(GrosseWentrup2009) # def test_schirrmeister2017(self): # self.run_dataset(Schirrmeister2017, subj=(0, 1)) @@ -107,10 +107,10 @@ def _get_events(raw): # self.run_dataset(Zhou2016) # def test_ssvep_exo(self): - # self.run_dataset(SSVEPExo) + # self.run_dataset(Kalunga2016) # def test_bi2013a(self): - # self.run_dataset(bi2013a) + # self.run_dataset(BI2013a) # def test_epflp300(self): # self.run_dataset(EPFLP300) diff --git a/moabb/tests/util_tests.py b/moabb/tests/util_tests.py index 79ca985bf..92edc8730 100644 --- a/moabb/tests/util_tests.py +++ b/moabb/tests/util_tests.py @@ -5,7 +5,7 @@ from mne import get_config from moabb.datasets import utils -from moabb.utils import set_download_dir, setup_seed +from moabb.utils import aliases_list, depreciated_alias, set_download_dir, setup_seed class TestDownload(unittest.TestCase): @@ -91,7 +91,111 @@ def test_without_torch(self, mock_print): @patch.dict("sys.modules", {"tensorflow": MagicMock(), "torch": MagicMock()}) def test_with_tensorflow_and_torch(self): # Test when tensorflow and torch are installed - self.assertTrue(setup_seed(42) == None) # noqa: E711 + self.assertTrue(setup_seed(42) is None) # noqa: E71 + + +class TestDepreciatedAlias(unittest.TestCase): + def test_class_alias(self): + @depreciated_alias("DummyB", expire_version="0.1") + class DummyA: + """DummyA class""" + + def __init__(self, a, b=1): + self.a = a + self.b = b + + def c(self): + return self.a + + self.assertIn(("DummyB", "DummyA", "0.1"), aliases_list) + + with self.assertNoLogs(logger="moabb.utils", level="WARN") as cm: + a = DummyA(2, b=2) + self.assertEqual( + a.__doc__, + "DummyA class\n\n Notes\n -----\n\n" + " .. note:: ``DummyA`` was previously named ``DummyB``. " + "``DummyB`` will be removed in version 0.1.\n", + ) + + with self.assertLogs(logger="moabb.utils", level="WARN") as cm: + b = DummyB(2, b=2) # noqa: F821 + + self.assertEqual(1, len(cm.output)) + expected = ( + "DummyB has been renamed to DummyA. DummyB will be removed in version 0.1." + ) + self.assertRegex(cm.output[0], expected) + # attributes: + self.assertEqual(b.a, 2) + self.assertEqual(b.b, 2) + # methods: + self.assertEqual(b.c(), 2) + # class name and type: + self.assertEqual(DummyB.__name__, "DummyB") # noqa: F821 + self.assertEqual(b.__class__.__name__, "DummyB") + self.assertIsInstance(b, DummyB) # noqa: F821 + self.assertIsInstance(b, DummyA) + + def test_class_alias_notes(self): + @depreciated_alias("DummyB", expire_version="0.1") + class DummyA: + """DummyA class + + Notes + ----- + + a note""" + + def __init__(self, a, b=1): + self.a = a + self.b = b + + def c(self): + return self.a + + self.assertIn(("DummyB", "DummyA", "0.1"), aliases_list) + + with self.assertNoLogs(logger="moabb.utils", level="WARN"): + a = DummyA(2, b=2) + self.assertEqual( + a.__doc__, + "DummyA class\n\n Notes\n -----\n\n" + " .. note:: ``DummyA`` was previously named ``DummyB``. " + "``DummyB`` will be removed in version 0.1.\n\n" + " a note", + ) + + def test_function_alias(self): + @depreciated_alias("dummy_b", expire_version="0.1") + def dummy_a(a, b=1): + """Dummy function""" + return a + b + + self.assertIn(("dummy_b", "dummy_a", "0.1"), aliases_list) + + with self.assertNoLogs(logger="moabb.utils", level="WARN") as cm: + self.assertEqual(dummy_a(2, b=2), 4) + self.assertEqual( + dummy_a.__doc__, + # "Dummy function\n\nNotes\n-----\n" + # "``dummy_a`` was previously named ``dummy_b``. " + # "``dummy_b`` will be removed in version 0.1.", + "Dummy function\n\n Notes\n -----\n\n" + " .. note:: ``dummy_a`` was previously named ``dummy_b``. " + "``dummy_b`` will be removed in version 0.1.\n", + ) + + with self.assertLogs(logger="moabb.utils", level="WARN") as cm: + self.assertEqual(dummy_b(2, b=2), 4) # noqa: F821 + + self.assertEqual(1, len(cm.output)) + expected = ( + "dummy_b has been renamed to dummy_a. dummy_b will be removed in version 0.1." + ) + self.assertRegex(cm.output[0], expected) + # class name and type: + self.assertEqual(dummy_b.__name__, "dummy_b") # noqa: F821 if __name__ == "__main__": diff --git a/moabb/utils.py b/moabb/utils.py index addc0aab8..a412af612 100644 --- a/moabb/utils.py +++ b/moabb/utils.py @@ -1,14 +1,20 @@ """Util functions for moabb.""" +import inspect import logging import os import os.path as osp import random +import re +import sys import numpy as np from mne import get_config, set_config from mne import set_log_level as sll +log = logging.getLogger(__name__) + + def _set_random_seed(seed: int) -> None: """Set the seed for Python's built-in random module and numpy. @@ -147,3 +153,58 @@ def set_download_dir(path): print("The path given does not exist, creating it..") os.makedirs(path) set_config("MNE_DATA", path) + + +aliases_list = [] # list of tuples containing (old name, new name, expire version) + + +def update_docstring_list(doc, section, msg): + header = f"{section}[ ]*\n[ ]*[\-]+[ ]*\n" + if section not in doc: + doc = doc + f"\n\n {section}\n {'-' * len(section)}\n" + if re.search(f"[ ]*{header}", doc) is None: + raise ValueError( + f"Incorrect formatting of section {section!r} in docstring {doc!r}" + ) + doc = re.sub(f"([ ]*)({header})", f"\g<1>\g<2>\n\g<1>{msg}\n", doc) + return doc + + +def depreciated_alias(name, expire_version): + """Decorator that creates an alias for the decorated function or class, + marks that alias as depreciated, and adds the alias to ``aliases_list``. + Not working on methods.""" + + def factory(func): + warn_msg = ( + f"{name} has been renamed to {func.__name__}. " + f"{name} will be removed in version {expire_version}." + ) + note_msg = ( + f".. note:: ``{func.__name__}`` was previously named ``{name}``. " + f"``{name}`` will be removed in version {expire_version}." + ) + + namespace = sys._getframe(1).f_globals # Caller's globals. + if inspect.isclass(func): + + def __init__(self, *args, **kwargs): + log.warning(warn_msg) + func.__init__(self, *args, **kwargs) + + namespace[name] = type(name, (func,), dict(func.__dict__, __init__=__init__)) + elif inspect.isfunction(func): + + def depreciated_func(*args, **kwargs): + log.warning(warn_msg) + return func(*args, **kwargs) + + depreciated_func.__name__ = name + namespace[name] = depreciated_func + else: + raise ValueError("Can only decorate functions and classes") + func.__doc__ = update_docstring_list(func.__doc__ or "", "Notes", note_msg) + aliases_list.append((name, func.__name__, expire_version)) + return func + + return factory diff --git a/tutorials/plot_Getting_Started.py b/tutorials/plot_Getting_Started.py index f8522a9cb..e7e297120 100644 --- a/tutorials/plot_Getting_Started.py +++ b/tutorials/plot_Getting_Started.py @@ -30,7 +30,7 @@ # If you would like to specify the logging level when it is running, you can # use the standard python logging commands through the top-level moabb module import moabb -from moabb.datasets import BNCI2014001, utils +from moabb.datasets import BNCI2014_001, utils from moabb.evaluations import CrossSessionEvaluation from moabb.paradigms import LeftRightImagery from moabb.pipelines.features import LogVariance @@ -71,7 +71,6 @@ print(LeftRightImagery().datasets) - ########################################################################## # Or you can run a search through the available datasets: print(utils.dataset_search(paradigm="imagery", min_subjects=6)) @@ -80,7 +79,7 @@ # Or you can simply make your own list (which we do here due to computational # constraints) -dataset = BNCI2014001() +dataset = BNCI2014_001() dataset.subject_list = dataset.subject_list[:2] datasets = [dataset] diff --git a/tutorials/tutorial_1_simple_example_motor_imagery.py b/tutorials/tutorial_1_simple_example_motor_imagery.py index def5b2dc6..1e3ad1850 100644 --- a/tutorials/tutorial_1_simple_example_motor_imagery.py +++ b/tutorials/tutorial_1_simple_example_motor_imagery.py @@ -23,7 +23,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014001 +from moabb.datasets import BNCI2014_001 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -46,7 +46,7 @@ # - importing the data from the files in whatever extension they might be # (like .mat, .gdf, etc.) and instantiate a Raw object from the MNE package -dataset = BNCI2014001() +dataset = BNCI2014_001() dataset.subject_list = [1, 2, 3] ############################################################################## @@ -69,7 +69,6 @@ run_name = "run_1" raw = sessions[subject][session_name][run_name] - ############################################################################## # Choosing a Paradigm # ------------------- @@ -91,7 +90,7 @@ ############################################################################## # We may check the list of all datasets available in MOABB for using with this -# paradigm (note that BNCI2014001 is in it) +# paradigm (note that BNCI2014_001 is in it) print(paradigm.datasets) diff --git a/tutorials/tutorial_2_using_mulitple_datasets.py b/tutorials/tutorial_2_using_mulitple_datasets.py index 377164a21..2cb5697c7 100644 --- a/tutorials/tutorial_2_using_mulitple_datasets.py +++ b/tutorials/tutorial_2_using_mulitple_datasets.py @@ -21,7 +21,7 @@ from sklearn.pipeline import make_pipeline import moabb -from moabb.datasets import BNCI2014001, Zhou2016 +from moabb.datasets import BNCI2014_001, Zhou2016 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -30,7 +30,6 @@ mne.set_log_level("CRITICAL") warnings.filterwarnings("ignore") - ############################################################################## # Initializing Datasets # --------------------- @@ -39,12 +38,11 @@ # (with left-hand/right-hand classes) but were recorded with different number # of electrodes, different number of trials, etc. -datasets = [Zhou2016(), BNCI2014001()] +datasets = [Zhou2016(), BNCI2014_001()] subj = [1, 2, 3] for d in datasets: d.subject_list = subj - ############################################################################## # The following lines go exactly as in the previous example, where we end up # obtaining a pandas dataframe containing the results of the evaluation. We diff --git a/tutorials/tutorial_3_benchmarking_multiple_pipelines.py b/tutorials/tutorial_3_benchmarking_multiple_pipelines.py index 050403649..ee131b7b7 100644 --- a/tutorials/tutorial_3_benchmarking_multiple_pipelines.py +++ b/tutorials/tutorial_3_benchmarking_multiple_pipelines.py @@ -24,7 +24,7 @@ from sklearn.svm import SVC import moabb -from moabb.datasets import BNCI2014001, Zhou2016 +from moabb.datasets import BNCI2014_001, Zhou2016 from moabb.evaluations import WithinSessionEvaluation from moabb.paradigms import LeftRightImagery @@ -33,7 +33,6 @@ moabb.set_log_level("info") warnings.filterwarnings("ignore") - ############################################################################## # Creating Pipelines # ------------------ @@ -55,7 +54,7 @@ ############################################################################## # The following lines go exactly as in the previous tutorial, where we end up # obtaining a pandas dataframe containing the results of the evaluation. -datasets = [BNCI2014001(), Zhou2016()] +datasets = [BNCI2014_001(), Zhou2016()] subj = [1, 2, 3] for d in datasets: d.subject_list = subj diff --git a/tutorials/tutorial_5_build_a_custom_dataset.py b/tutorials/tutorial_5_build_a_custom_dataset.py index 7e6f8d9a1..23c8e1d33 100644 --- a/tutorials/tutorial_5_build_a_custom_dataset.py +++ b/tutorials/tutorial_5_build_a_custom_dataset.py @@ -11,8 +11,8 @@ from pyriemann.estimation import ERPCovariances from sklearn.pipeline import make_pipeline -from moabb.datasets import VirtualReality -from moabb.datasets.braininvaders import bi2014a +from moabb.datasets import Cattan2019_VR +from moabb.datasets.braininvaders import BI2014a from moabb.datasets.compound_dataset import CompoundDataset from moabb.datasets.utils import blocks_reps from moabb.evaluations import WithinSessionEvaluation @@ -35,6 +35,7 @@ pipelines = {} pipelines["MDM"] = make_pipeline(ERPCovariances(estimator="lwf"), MDM(metric="riemann")) + ############################################################################## # Creation a selection of subject # ------------------ @@ -53,7 +54,7 @@ class CustomDataset1(CompoundDataset): def __init__(self): - biVR = VirtualReality(virtual_reality=True, screen_display=True) + biVR = Cattan2019_VR(virtual_reality=True, screen_display=True) runs = blocks_reps([1, 3], [1, 2, 3, 4, 5]) subjects_list = [ (biVR, 1, "VR", runs), @@ -71,7 +72,7 @@ def __init__(self): class CustomDataset2(CompoundDataset): def __init__(self): - bi2014 = bi2014a() + bi2014 = BI2014a() subjects_list = [ (bi2014, 4, None, None), (bi2014, 7, None, None), From 05135c5974c69a7d2efc47f81f6db7355a28903c Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Sat, 19 Aug 2023 10:32:43 +0200 Subject: [PATCH 35/64] Add links to classes in Datasets section of doc (#462) * Add links to classes in *Datasets* section of doc (first test) * Add links to classes in Datasets section of doc (all) --- docs/source/dataset_summary.rst | 99 ++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst index f8602b256..326f84a7f 100644 --- a/docs/source/dataset_summary.rst +++ b/docs/source/dataset_summary.rst @@ -1,5 +1,10 @@ .. _data_summary: +.. automodule:: moabb.datasets + +.. currentmodule:: moabb.datasets + + Data Summary ====================== @@ -20,22 +25,22 @@ Motor Imagery :header: Dataset, #Subj, #Chan, #Classes, #Trials, Trial length, Freq, #Session, #Runs, Total_trials :class: sortable - AlexMI,8,16,3,20,3s,512Hz,1,1,480 - BNCI2014_001,9,22,4,144,4s,250Hz,2,6,62208 - BNCI2014_002,14,15,2,80,5s,512Hz,1,8,17920 - BNCI2014_004,9,3,2,360,4.5s,250Hz,5,1,32400 - BNCI2015_001,12,13,2,200,5s,512Hz,3,1,14400 - BNCI2015_004,9,30,5,80,7s,256Hz,2,1,7200 - Cho2017,52,64,2,100,3s,512Hz,1,1,9800 - Lee2019_MI,54,62,2,100,4s,1000Hz,2,1,11000 - GrosseWentrup2009,10,128,2,150,7s,500Hz,1,1,3000 - Schirrmeister2017,14,128,4,120,4s,500Hz,1,2,13440 - Ofner2017,15,61,7,60,3s,512Hz,1,10,63000 - PhysionetMI,109,64,4,23,3s,160Hz,1,1,69760 - Shin2017A,29,30,2,30,10s,200Hz,3,1,5220 - Shin2017B,29,30,2,30,10s,200Hz,3,1,5220 - Weibo2014,10,60,7,80,4s,200Hz,1,1,5600 - Zhou2016,4,14,3,160,5s,250Hz,3,2,11496 + :class:`AlexMI`,8,16,3,20,3s,512Hz,1,1,480 + :class:`BNCI2014_001`,9,22,4,144,4s,250Hz,2,6,62208 + :class:`BNCI2014_002`,14,15,2,80,5s,512Hz,1,8,17920 + :class:`BNCI2014_004`,9,3,2,360,4.5s,250Hz,5,1,32400 + :class:`BNCI2015_001`,12,13,2,200,5s,512Hz,3,1,14400 + :class:`BNCI2015_004`,9,30,5,80,7s,256Hz,2,1,7200 + :class:`Cho2017`,52,64,2,100,3s,512Hz,1,1,9800 + :class:`Lee2019_MI`,54,62,2,100,4s,1000Hz,2,1,11000 + :class:`GrosseWentrup2009`,10,128,2,150,7s,500Hz,1,1,3000 + :class:`Schirrmeister2017`,14,128,4,120,4s,500Hz,1,2,13440 + :class:`Ofner2017`,15,61,7,60,3s,512Hz,1,10,63000 + :class:`PhysionetMI`,109,64,4,23,3s,160Hz,1,1,69760 + :class:`Shin2017A`,29,30,2,30,10s,200Hz,3,1,5220 + :class:`Shin2017B`,29,30,2,30,10s,200Hz,3,1,5220 + :class:`Weibo2014`,10,60,7,80,4s,200Hz,1,1,5600 + :class:`Zhou2016`,4,14,3,160,5s,250Hz,3,2,11496 P300/ERP ====================== @@ -44,21 +49,21 @@ P300/ERP :header: Dataset, #Subj, #Chan, #Trials / class, Trials length, Sampling rate, #Sessions :class: sortable - BNCI2014_008, 8, 8, 3500 NT / 700 T, 1s, 256Hz, 1 - BNCI2014_009, 10, 16, 1440 NT / 288 T, 0.8s, 256Hz, 3 - BNCI2015_003, 10, 8, 1500 NT / 300 T, 0.8s, 256Hz, 1 - BI2012, 25, 16, 640 NT / 128 T, 1s, 128Hz, 2 - BI2013a, 24, 16, 3200 NT / 640 T, 1s, 512Hz, 8 for subjects 1-7 else 1 - BI2014a, 64, 16, 990 NT / 198 T, 1s, 512Hz, up to 3 - BI2014b, 38, 32, 200 NT / 40 T, 1s, 512Hz, 3 - BI2015a, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 - BI2015b, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 1 - Cattan2019_VR, 21, 16, 600 NT / 120 T, 1s, 512Hz, 2 - Huebner2017, 13, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 - Huebner2018, 12, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 - Sosulski2019, 13, 31, 75 NT / 15 T, 1.2s, 1000Hz, 3 - EPFLP300, 8, 32, 2753 NT / 551 T, 1s, 2048Hz, 4 - Lee2019_ERP, 54, 62, 6900 NT / 1380 T, 1s, 1000Hz, 2 + :class:`BNCI2014_008`, 8, 8, 3500 NT / 700 T, 1s, 256Hz, 1 + :class:`BNCI2014_009`, 10, 16, 1440 NT / 288 T, 0.8s, 256Hz, 3 + :class:`BNCI2015_003`, 10, 8, 1500 NT / 300 T, 0.8s, 256Hz, 1 + :class:`BI2012`, 25, 16, 640 NT / 128 T, 1s, 128Hz, 2 + :class:`BI2013a`, 24, 16, 3200 NT / 640 T, 1s, 512Hz, 8 for subjects 1-7 else 1 + :class:`BI2014a`, 64, 16, 990 NT / 198 T, 1s, 512Hz, up to 3 + :class:`BI2014b`, 38, 32, 200 NT / 40 T, 1s, 512Hz, 3 + :class:`BI2015a`, 43, 32, 4131 NT / 825 T, 1s, 512Hz, 3 + :class:`BI2015b`, 44, 32, 2160 NT / 480 T, 1s, 512Hz, 1 + :class:`Cattan2019_VR`, 21, 16, 600 NT / 120 T, 1s, 512Hz, 2 + :class:`Huebner2017`, 13, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 + :class:`Huebner2018`, 12, 31, 364 NT / 112 T, 0.9s, 1000Hz, 3 + :class:`Sosulski2019`, 13, 31, 75 NT / 15 T, 1.2s, 1000Hz, 3 + :class:`EPFLP300`, 8, 32, 2753 NT / 551 T, 1s, 2048Hz, 4 + :class:`Lee2019_ERP`, 54, 62, 6900 NT / 1380 T, 1s, 1000Hz, 2 SSVEP @@ -69,13 +74,13 @@ SSVEP :header: Dataset, #Subj, #Chan, #Classes, #Trials / class, Trials length, Sampling rate, #Sessions :class: sortable - Lee2019_SSVEP,54,16,4,25,1s,1000Hz,1 - Kalunga2016,12,8,4,16,2s,256Hz,1 - MAMEM1,10,256,5,12-15,3s,250Hz,1 - MAMEM2,10,256,5,20-30,3s,250Hz,1 - MAMEM3,10,14,4,20-30,3s,128Hz,1 - Nakanishi2015,9,8,12,15,4.15s,256Hz,1 - Wang2016,34,62,40,6,5s,250Hz,1 + :class:`Lee2019_SSVEP`,54,16,4,25,1s,1000Hz,1 + :class:`Kalunga2016`,12,8,4,16,2s,256Hz,1 + :class:`MAMEM1`,10,256,5,12-15,3s,250Hz,1 + :class:`MAMEM2`,10,256,5,20-30,3s,250Hz,1 + :class:`MAMEM3`,10,14,4,20-30,3s,128Hz,1 + :class:`Nakanishi2015`,9,8,12,15,4.15s,256Hz,1 + :class:`Wang2016`,34,62,40,6,5s,250Hz,1 Resting States @@ -89,12 +94,16 @@ is a resting state experiment. :header: Dataset, #Subj, #Chan, #Classes, #Blocks / class, Trials length, Sampling rate, #Sessions :class: sortable - Cattan2019_PHMD,12,16,2,10,60s,512Hz,1 + :class:`Cattan2019_PHMD`,12,16,2,10,60s,512Hz,1 Compound Datasets ====================== +.. automodule:: moabb.datasets.compound_dataset + +.. currentmodule:: moabb.datasets.compound_dataset + Compound Datasets are datasets compounded with subjects from other datasets. It is useful for merging different datasets (including other Compound Datasets), select a sample of subject inside a dataset (e.g. subject with high/low performance). @@ -103,12 +112,12 @@ select a sample of subject inside a dataset (e.g. subject with high/low performa :header: Dataset, #Subj, #Original datasets :class: sortable - BI2014a_Il,17,BI2014a - BI2014b_Il,11,BI2014b - BI2015a_Il,2,BI2015a - BI2015b_Il,25,BI2015b - Cattan2019_VR_Il,4,Cattan2019_VR - BI_Il,59,BI2014a_Il BI2014b_Il BI2015a_Il BI2015b_Il Cattan2019_VR_Il + :class:`BI2014a_Il`,17,BI2014a + :class:`BI2014b_Il`,11,BI2014b + :class:`BI2015a_Il`,2,BI2015a + :class:`BI2015b_Il`,25,BI2015b + :class:`Cattan2019_VR_Il`,4,Cattan2019_VR + :class:`BI_Il`,59,:class:`BI2014a_Il` :class:`BI2014b_Il` :class:`BI2015a_Il` :class:`BI2015b_Il` :class:`Cattan2019_VR_Il` Submit a new dataset From 90e358f84e956b481188e31c85c20e5293af7619 Mon Sep 17 00:00:00 2001 From: Bru Date: Mon, 21 Aug 2023 14:15:13 +0200 Subject: [PATCH 36/64] Delaying the depreciation of old dataset names in the dataset list (#464) * Updating the whats_new.rst and fixing the dataset list * fixing tests and fixing datalist order * whats_new file * Commeting one test * Returning the test * Fixing tests * Update moabb/tests/datasets.py Co-authored-by: PierreGtch <25532709+PierreGtch@users.noreply.github.com> * Fixing tests --------- Co-authored-by: PierreGtch <25532709+PierreGtch@users.noreply.github.com> --- docs/source/whats_new.rst | 2 +- moabb/datasets/__init__.py | 43 +++++++++++++++++++------------------- moabb/tests/datasets.py | 13 +++++++----- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index fb2255f39..a29741365 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -66,7 +66,7 @@ Bugs - Fix :func:`moabb.paradigms.FakeImageryParadigm`, :func:`moabb.paradigms.FakeP300Paradigm` and :func:`moabb.paradigms.FakeSSVEPParadigm` ``is_valid`` methods to only accept the correct datasets (PR :gh:`408` by `Pierre Guetschel`_) - Fix ``dataset_list`` construction, which could be empty due to bad import order (PR :gh:`449` by `Thomas Moreau`_). - Fixing dataset downloader from servers with non-http (PR :gh:`433` by `Sara Sedlar`_) - +- Fix ``dataset_list`` to include deprecated datasets (PR :gh:`464` by `Bruno Aristimunha`_) API changes ~~~~~~~~~~~ diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index ff35d8284..38bc1cd14 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -11,6 +11,16 @@ # flake8: noqa from .alex_mi import AlexMI from .bbci_eeg_fnirs import Shin2017A, Shin2017B + +# Depreciated datasets (will be removed in the future): +from .bnci import BNCI2014001 # noqa: F401 +from .bnci import BNCI2014002 # noqa: F401 +from .bnci import BNCI2014004 # noqa: F401 +from .bnci import BNCI2014008 # noqa: F401 +from .bnci import BNCI2014009 # noqa: F401 +from .bnci import BNCI2015001 # noqa: F401 +from .bnci import BNCI2015003 # noqa: F401 +from .bnci import BNCI2015004 # noqa: F401 from .bnci import ( BNCI2014_001, BNCI2014_002, @@ -21,6 +31,13 @@ BNCI2015_003, BNCI2015_004, ) +from .braininvaders import VirtualReality # noqa: F401 +from .braininvaders import bi2012 # noqa: F401 +from .braininvaders import bi2013a # noqa: F401 +from .braininvaders import bi2014a # noqa: F401 +from .braininvaders import bi2014b # noqa: F401 +from .braininvaders import bi2015a # noqa: F401 +from .braininvaders import bi2015b # noqa: F401 from .braininvaders import ( BI2012, BI2013a, @@ -35,12 +52,15 @@ from .gigadb import Cho2017 from .huebner_llp import Huebner2017, Huebner2018 from .Lee2019 import Lee2019_ERP, Lee2019_MI, Lee2019_SSVEP +from .mpi_mi import MunichMI # noqa: F401 from .mpi_mi import GrosseWentrup2009 from .neiry import DemonsP300 +from .phmd_ml import HeadMountedDisplay # noqa: F401 from .phmd_ml import Cattan2019_PHMD from .physionet_mi import PhysionetMI from .schirrmeister2017 import Schirrmeister2017 from .sosulski2019 import Sosulski2019 +from .ssvep_exo import SSVEPExo # noqa: F401 from .ssvep_exo import Kalunga2016 from .ssvep_mamem import MAMEM1, MAMEM2, MAMEM3 from .ssvep_nakanishi import Nakanishi2015 @@ -51,27 +71,6 @@ from .Zhou2016 import Zhou2016 -# Call this last in order to make sure the dataset list contains all +# Call this last in order to make sure the dataset list is populated with # the datasets imported in this file. _init_dataset_list() -del _init_dataset_list - -# Depreciated datasets (not added to dataset_list): -from .bnci import BNCI2014001 # noqa: F401 -from .bnci import BNCI2014002 # noqa: F401 -from .bnci import BNCI2014004 # noqa: F401 -from .bnci import BNCI2014008 # noqa: F401 -from .bnci import BNCI2014009 # noqa: F401 -from .bnci import BNCI2015001 # noqa: F401 -from .bnci import BNCI2015003 # noqa: F401 -from .bnci import BNCI2015004 # noqa: F401 -from .braininvaders import VirtualReality # noqa: F401 -from .braininvaders import bi2012 # noqa: F401 -from .braininvaders import bi2013a # noqa: F401 -from .braininvaders import bi2014a # noqa: F401 -from .braininvaders import bi2014b # noqa: F401 -from .braininvaders import bi2015a # noqa: F401 -from .braininvaders import bi2015b # noqa: F401 -from .mpi_mi import MunichMI # noqa: F401 -from .phmd_ml import HeadMountedDisplay # noqa: F401 -from .ssvep_exo import SSVEPExo # noqa: F401 diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py index ae4457945..f53ffb6ee 100644 --- a/moabb/tests/datasets.py +++ b/moabb/tests/datasets.py @@ -183,6 +183,8 @@ def test_dataset_accept(self): def test_datasets_init(self): codes = [] logger = logging.getLogger("moabb.datasets.base") + deprecated_list, _, _ = zip(*aliases_list) + for ds in dataset_list: kwargs = {} if inspect.signature(ds).parameters.get("accept"): @@ -192,9 +194,11 @@ def test_datasets_init(self): # Trick needed because assertNoLogs only inrtoduced in python 3.10: logger.warning(f"Testing {ds.__name__}") obj = ds(**kwargs) - self.assertEqual(len(cm.output), 1) + if type(obj).__name__ not in deprecated_list: + self.assertEqual(len(cm.output), 1) self.assertIsNotNone(obj) - codes.append(obj.code) + if type(obj).__name__ not in deprecated_list: + codes.append(obj.code) # Check that all codes are unique: self.assertEqual(len(codes), len(set(codes))) @@ -219,17 +223,16 @@ def test_dataset_list(self): if aliases_list: depreciated_list, _, _ = zip(*aliases_list) else: - depreciated_list = [] + pass all_datasets = [ c for c in db.__dict__.values() if ( inspect.isclass(c) and issubclass(c, BaseDataset) - and c.__name__ not in depreciated_list + # and c.__name__ not in depreciated_list ) ] - assert len(dataset_list) == len(all_datasets) assert set(dataset_list) == set(all_datasets) From cf79df6cd603b7a56694cc911e9e814975abc0d4 Mon Sep 17 00:00:00 2001 From: Bru Date: Mon, 21 Aug 2023 14:15:43 +0200 Subject: [PATCH 37/64] Adding Sara and Pierre in the main webpage (#465) * Adding Sara and Pierre in the main webpage * Update README.md Thanks @bruAristimunha ! :blush: I've changed it a bit to take into account contributions. --------- Co-authored-by: Sara Sedlar --- docs/source/README.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/source/README.md b/docs/source/README.md index 8142dfad2..cd91d58c1 100644 --- a/docs/source/README.md +++ b/docs/source/README.md @@ -85,14 +85,18 @@ The project is currently maintained by: Sylvain Chevallier Bruno Aristimunha Igor Carrara + Pierre Guetschel + Sara Sedlar - Sylvain Chevallier - Bruno Aristimunha - Igor Carrara - + Sylvain Chevallier + Bruno Aristimunha + Igor Carrara + Pierre Guetschel + Sara Sedlar + @@ -123,7 +127,7 @@ The MOABB is a community project, and we are always thankful for all the contrib