From a71aaa8b0b62bd7fe8831533be65ff80441f6cf9 Mon Sep 17 00:00:00 2001 From: Samuel Boehm Date: Wed, 11 Dec 2024 19:31:58 +0100 Subject: [PATCH 1/7] handle pagination - needed for Stieger2021 ds --- moabb/datasets/download.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py index 4684a89fe..48507203c 100644 --- a/moabb/datasets/download.py +++ b/moabb/datasets/download.py @@ -206,30 +206,39 @@ def fs_issue_request(method, url, headers, data=None, binary=False): def fs_get_file_list(article_id, version=None): """List all the files associated with a given article. - Parameters ---------- article_id : str or int Figshare article ID version : str or id, default is None Figshare article version. If None, selects the most recent version. - Returns ------- response : dict HTTP request response as a python dict """ fsurl = "https://api.figshare.com/v2" - if version is None: - url = fsurl + "/articles/{}/files".format(article_id) - headers = {"Content-Type": "application/json"} - response = fs_issue_request("GET", url, headers=headers) - return response - else: - url = fsurl + "/articles/{}/versions/{}".format(article_id, version) - headers = {"Content-Type": "application/json"} - request = fs_issue_request("GET", url, headers=headers) - return request["files"] + all_files = [] + page = 1 + + while True: + if version is None: + url = f"{fsurl}/articles/{article_id}/files?page={page}&page_size=100" + headers = {"Content-Type": "application/json"} + response = fs_issue_request("GET", url, headers=headers) + + if not response: # If response is empty, we've got all files + break + + all_files.extend(response) + page += 1 + else: + url = f"{fsurl}/articles/{article_id}/versions/{version}" + headers = {"Content-Type": "application/json"} + request = fs_issue_request("GET", url, headers=headers) + return request["files"] + + return all_files def fs_get_file_hash(filelist): From b05d58ab30448e8a11861beecfd4608971d5002d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 18:40:16 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks --- moabb/datasets/download.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py index 48507203c..d2fec1a11 100644 --- a/moabb/datasets/download.py +++ b/moabb/datasets/download.py @@ -220,16 +220,16 @@ def fs_get_file_list(article_id, version=None): fsurl = "https://api.figshare.com/v2" all_files = [] page = 1 - + while True: if version is None: url = f"{fsurl}/articles/{article_id}/files?page={page}&page_size=100" headers = {"Content-Type": "application/json"} response = fs_issue_request("GET", url, headers=headers) - + if not response: # If response is empty, we've got all files break - + all_files.extend(response) page += 1 else: @@ -237,7 +237,7 @@ def fs_get_file_list(article_id, version=None): headers = {"Content-Type": "application/json"} request = fs_issue_request("GET", url, headers=headers) return request["files"] - + return all_files From 705ebeffd5743c9cdfd122fca79399db28d114e0 Mon Sep 17 00:00:00 2001 From: Samuel Boehm Date: Sun, 22 Dec 2024 14:10:14 +0100 Subject: [PATCH 3/7] add beetl datasets --- moabb/datasets/__init__.py | 1 + moabb/datasets/beetl.py | 272 +++++++++++++++++++++++++++++++++++++ 2 files changed, 273 insertions(+) create mode 100644 moabb/datasets/beetl.py diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index 5b3a41fae..a86de8455 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -82,6 +82,7 @@ from .utils import _init_dataset_list from .Weibo2014 import Weibo2014 from .Zhou2016 import Zhou2016 +from .beetl import beetlA, beetlB # Call this last in order to make sure the dataset list is populated with diff --git a/moabb/datasets/beetl.py b/moabb/datasets/beetl.py new file mode 100644 index 000000000..1b3e57ca5 --- /dev/null +++ b/moabb/datasets/beetl.py @@ -0,0 +1,272 @@ +import logging +import os +from pathlib import Path +import mne +import numpy as np +import pooch +from .base import BaseDataset +from .download import get_dataset_path +import moabb.datasets.download as dl + + +LOGGER = logging.getLogger(__name__) +BASE_URL = "https://ndownloader.figshare.com/files/" + +LEADERBOARD_ARTICLE_ID = 14839650 +FINAL_EVALUATION_ARTICLE_ID = 16586213 +FINAL_LABEL_TXT_ARTICLE_ID = 21602622 + +class beetlA(BaseDataset): + """Motor Imagery dataset from BEETL Competition - Dataset A. + + Dataset A contains data from subjects with 500 Hz sampling rate and 63 EEG channels. + In the leaderboard phase, this includes subjects 1-2, while in the final phase it includes + subjects 1-3. + + Motor imagery tasks include: + - Rest (label 0) + - Left hand (label 1) + - Right hand (label 2) + - Feet (label 3) + + References + ---------- + .. [1] Original dataset: https://www.kaggle.com/competitions/beetl + """ + + def __init__(self, phase="final"): + """Initialize BEETL Dataset A. + + Parameters + ---------- + phase : str + Either "leaderboard" (subjects 1-2) or "final" (subjects 1-3) + """ + if phase not in ["leaderboard", "final"]: + raise ValueError("Phase must be either 'leaderboard' or 'final'") + + self.phase = phase + subjects = list(range(1, 3)) if phase == "leaderboard" else list(range(1, 4)) + + # Channel setup + self.ch_names = ['Fp1', 'Fz', 'F3', 'F7', 'FT9', 'FC5', 'FC1', 'C3', 'T7', 'TP9', + 'CP5', 'CP1', 'Pz', 'P3', 'P7', 'O1', 'Oz', 'O2', 'P4', 'P8', + 'TP10', 'CP6', 'CP2', 'C4', 'T8', 'FT10', 'FC6', 'FC2', 'F4', + 'F8', 'Fp2', 'AF7', 'AF3', 'AFz', 'F1', 'F5', 'FT7', 'FC3', + 'FCz', 'C1', 'C5', 'TP7', 'CP3', 'P1', 'P5', 'PO7', 'PO3', + 'POz', 'PO4', 'PO8', 'P6', 'P2', 'CPz', 'CP4', 'TP8', 'C6', + 'C2', 'FC4', 'FT8', 'F6', 'F2', 'AF4', 'AF8'] + + self.sfreq = 500 + + + + super().__init__( + subjects=subjects, + sessions_per_subject=1, # Data is concatenated into one session + events=dict( + rest=0, + left_hand=1, + right_hand=2, + feet=3 + ), + code="beetl", + interval=[0, 4], # 4s trial window + paradigm="imagery", + ) + + def _get_single_subject_data(self, subject): + """Return data for a single subject.""" + file_paths = self.data_path(subject) + + # Create MNE info + info = mne.create_info(ch_names=self.ch_names, sfreq=self.sfreq, ch_types=['eeg'] * len(self.ch_names)) + + + phase_str = "leaderboardMI" if self.phase == "leaderboard" else "finalMI" + subject_dir = Path(file_paths[0]) / phase_str / phase_str / f'S{subject}' + + data_list = [] + labels_list = [] + + # Load training data + for race in range(1, 6): + data_file = subject_dir / 'training' / f'race{race}_padsData.npy' + label_file = subject_dir / 'training' / f'race{race}_padsLabel.npy' + if data_file.exists() and label_file.exists(): + data_list.append(np.load(data_file, allow_pickle=True)) + labels_list.append(np.load(label_file, allow_pickle=True)) + + data = np.concatenate(data_list) + labels = np.concatenate(labels_list) + + # Create events array + events = np.column_stack(( + np.arange(0, len(labels) * data.shape[-1], data.shape[-1]), + np.zeros(len(labels), dtype=int), + labels + )) + + # Create Raw object + event_desc = {int(code): name for name, code in self.event_id.items()} + raw = mne.io.RawArray(np.hstack(data), info) + raw.set_annotations(mne.annotations_from_events( + events=events, + event_desc=event_desc, + sfreq=self.sfreq + )) + + return {"0": {"0": raw}} + + def data_path( + self, subject, path=None, force_update=False, update_path=None, verbose=None + ): + """Return path to the data files.""" + if subject not in self.subject_list: + raise ValueError(f"Subject {subject} not in {self.subject_list}") + + path = get_dataset_path("BEETL", path) + base_path = Path(os.path.join(path, f"MNE-{self.code:s}-data") +) + # Create the directory if it doesn't exist + base_path.mkdir(parents=True, exist_ok=True) + + # Download data if needed + for article_id in [LEADERBOARD_ARTICLE_ID, FINAL_EVALUATION_ARTICLE_ID]: + file_list = dl.fs_get_file_list(article_id) + hash_file_list = dl.fs_get_file_hash(file_list) + id_file_list = dl.fs_get_file_id(file_list) + + for file_name in id_file_list.keys(): + fpath = base_path / file_name + if not fpath.exists() or force_update: + pooch.retrieve( + url=BASE_URL + id_file_list[file_name], + known_hash=hash_file_list[id_file_list[file_name]], + fname=file_name, + path=base_path, + processor=pooch.Unzip(extract_dir=os.path.splitext(file_name)[0]), + downloader=pooch.HTTPDownloader(progressbar=True), + ) + + return [str(base_path)] + + +class beetlB(BaseDataset): + """Motor Imagery dataset from BEETL Competition - Dataset B. + + Dataset B contains data from subjects with 200 Hz sampling rate and 32 EEG channels. + In the leaderboard phase, this includes subjects 3-5, while in the final phase it includes + subjects 4-5. + + Motor imagery tasks include: + - Left hand (label 0) + - Right hand (label 1) + - Feet (label 2) + - Rest (label 3) + + References + ---------- + .. [1] Original dataset: https://www.kaggle.com/competitions/beetl + """ + + def __init__(self, phase="final"): + """Initialize BEETL Dataset B. + + Parameters + ---------- + phase : str + Either "leaderboard" (subjects 3-5) or "final" (subjects 4-5) + """ + if phase not in ["leaderboard", "final"]: + raise ValueError("Phase must be either 'leaderboard' or 'final'") + + self.phase = phase + subjects = list(range(3, 6)) if phase == "leaderboard" else list(range(4, 6)) + + super().__init__( + subjects=subjects, + sessions_per_subject=1, # Data is concatenated into one session + events=dict( + left_hand=0, + right_hand=1, + feet=2, + rest=3 + ), + code="beetl", + interval=[0, 4], # 4s trial window + paradigm="imagery", + ) + + def _get_single_subject_data(self, subject): + """Return data for a single subject.""" + file_paths = self.data_path(subject) + + # Channel setup + ch_names = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'FC5', 'FC1', 'FC2', 'FC6', + 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', + 'CPz', 'CP2', 'CP4', 'CP6', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', + 'P4', 'P6', 'P8'] + sfreq = 200 + + # Create MNE info + info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=['eeg'] * len(ch_names)) + + # Load data + phase_str = "leaderboardMI" if self.phase == "leaderboard" else "finalMI" + subject_dir = Path(file_paths[0]) / phase_str / phase_str / f'S{subject}' + + # Load training data + data = np.load(subject_dir / 'training' / f'training_s{subject}X.npy', allow_pickle=True) + labels = np.load(subject_dir / 'training' / f'training_s{subject}y.npy', allow_pickle=True) + + # Create events array + events = np.column_stack(( + np.arange(0, len(labels) * data.shape[-1], data.shape[-1]), + np.zeros(len(labels), dtype=int), + labels + )) + + # Create Raw object + event_desc = {int(code): name for name, code in self.event_id.items()} + raw = mne.io.RawArray(np.hstack(data), info) + raw.set_annotations(mne.annotations_from_events( + events=events, + event_desc=event_desc, + sfreq=sfreq + )) + + return {"0": {"0": raw}} + + def data_path( + self, subject, path=None, force_update=False, update_path=None, verbose=None + ): + """Return path to the data files.""" + if subject not in self.subject_list: + raise ValueError(f"Subject {subject} not in {self.subject_list}") + + path = get_dataset_path("BEETL", path) + base_path = Path(path) + + # Create the directory if it doesn't exist + base_path.mkdir(parents=True, exist_ok=True) + + # Download data if needed + for article_id in [LEADERBOARD_ARTICLE_ID, FINAL_EVALUATION_ARTICLE_ID]: + file_list = dl.fs_get_file_list(article_id) + hash_file_list = dl.fs_get_file_hash(file_list) + id_file_list = dl.fs_get_file_id(file_list) + + for file_name in id_file_list.keys(): + fpath = base_path / file_name + if not fpath.exists() or force_update: + pooch.retrieve( + url=BASE_URL + id_file_list[file_name], + known_hash=hash_file_list[id_file_list[file_name]], + fname=file_name, + path=base_path, + processor=pooch.Unzip(extract_dir=os.path.splitext(file_name)[0]), + downloader=pooch.HTTPDownloader(progressbar=True), + ) + + return [str(base_path)] From d3429292563b256018d5e4e06d818455cf6fe000 Mon Sep 17 00:00:00 2001 From: Samuel Boehm Date: Sun, 22 Dec 2024 23:57:54 +0100 Subject: [PATCH 4/7] beetl dataset final eval does now contain labels --- moabb/datasets/{beetl.py => BEETL.py} | 126 +++++++++++++++++++++----- moabb/datasets/__init__.py | 2 +- 2 files changed, 105 insertions(+), 23 deletions(-) rename moabb/datasets/{beetl.py => BEETL.py} (66%) diff --git a/moabb/datasets/beetl.py b/moabb/datasets/BEETL.py similarity index 66% rename from moabb/datasets/beetl.py rename to moabb/datasets/BEETL.py index 1b3e57ca5..11daee1d2 100644 --- a/moabb/datasets/beetl.py +++ b/moabb/datasets/BEETL.py @@ -16,7 +16,7 @@ FINAL_EVALUATION_ARTICLE_ID = 16586213 FINAL_LABEL_TXT_ARTICLE_ID = 21602622 -class beetlA(BaseDataset): +class BEETLA(BaseDataset): """Motor Imagery dataset from BEETL Competition - Dataset A. Dataset A contains data from subjects with 500 Hz sampling rate and 63 EEG channels. @@ -59,8 +59,6 @@ def __init__(self, phase="final"): self.sfreq = 500 - - super().__init__( subjects=subjects, sessions_per_subject=1, # Data is concatenated into one session @@ -82,7 +80,6 @@ def _get_single_subject_data(self, subject): # Create MNE info info = mne.create_info(ch_names=self.ch_names, sfreq=self.sfreq, ch_types=['eeg'] * len(self.ch_names)) - phase_str = "leaderboardMI" if self.phase == "leaderboard" else "finalMI" subject_dir = Path(file_paths[0]) / phase_str / phase_str / f'S{subject}' @@ -115,8 +112,36 @@ def _get_single_subject_data(self, subject): event_desc=event_desc, sfreq=self.sfreq )) + + + # Load test data + test_data_list = [] + for race in range(6, 16): + data_file = subject_dir / 'testing' / f'race{race}_padsData.npy' + if data_file.exists(): + test_data_list.append(np.load(data_file, allow_pickle=True)) + + test_data = np.concatenate(test_data_list) - return {"0": {"0": raw}} + # load labels from .txt + test_labels = np.loadtxt(Path(file_paths[0]) / 'final_MI_label.txt', dtype=int) + subject_labels = test_labels[(subject-1)*test_data.shape[0]:(subject)*test_data.shape[0]] + + test_events = np.column_stack(( + np.arange(0, len(subject_labels) * test_data.shape[-1], test_data.shape[-1]), + np.zeros(len(subject_labels), dtype=int), + subject_labels + )) + + # Create Raw object + test_raw = mne.io.RawArray(np.hstack(test_data), info) + test_raw.set_annotations(mne.annotations_from_events( + events=test_events, + event_desc=event_desc, + sfreq=self.sfreq + )) + + return {"0": {"0train": raw, "1test": test_raw}} def data_path( self, subject, path=None, force_update=False, update_path=None, verbose=None @@ -148,11 +173,29 @@ def data_path( processor=pooch.Unzip(extract_dir=os.path.splitext(file_name)[0]), downloader=pooch.HTTPDownloader(progressbar=True), ) + # Download labels for final phase + if self.phase == "final": + file_list = dl.fs_get_file_list(FINAL_LABEL_TXT_ARTICLE_ID) + hash_file_list = dl.fs_get_file_hash(file_list) + id_file_list = dl.fs_get_file_id(file_list) + + for file_name in id_file_list.keys(): + fpath = base_path / file_name + if (not fpath.exists() or force_update) and file_name == "final_MI_label.txt": + fpath = base_path / file_name + if not fpath.exists() or force_update: + pooch.retrieve( + url=BASE_URL + id_file_list[file_name], + known_hash=hash_file_list[id_file_list[file_name]], + fname=file_name, + path=base_path, + downloader=pooch.HTTPDownloader(progressbar=True), + ) return [str(base_path)] -class beetlB(BaseDataset): +class BEETLB(BaseDataset): """Motor Imagery dataset from BEETL Competition - Dataset B. Dataset B contains data from subjects with 200 Hz sampling rate and 32 EEG channels. @@ -198,45 +241,65 @@ def __init__(self, phase="final"): paradigm="imagery", ) + self.ch_names = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'FC5', 'FC1', 'FC2', 'FC6', + 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', + 'CPz', 'CP2', 'CP4', 'CP6', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', + 'P4', 'P6', 'P8'] + self.sfreq = 200 + def _get_single_subject_data(self, subject): """Return data for a single subject.""" file_paths = self.data_path(subject) - # Channel setup - ch_names = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'FC5', 'FC1', 'FC2', 'FC6', - 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', - 'CPz', 'CP2', 'CP4', 'CP6', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', - 'P4', 'P6', 'P8'] - sfreq = 200 # Create MNE info - info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=['eeg'] * len(ch_names)) + info = mne.create_info(ch_names=self.ch_names, sfreq=self.sfreq, ch_types=['eeg'] * len(self.ch_names)) # Load data phase_str = "leaderboardMI" if self.phase == "leaderboard" else "finalMI" subject_dir = Path(file_paths[0]) / phase_str / phase_str / f'S{subject}' # Load training data - data = np.load(subject_dir / 'training' / f'training_s{subject}X.npy', allow_pickle=True) - labels = np.load(subject_dir / 'training' / f'training_s{subject}y.npy', allow_pickle=True) + train_data = np.load(subject_dir / 'training' / f'training_s{subject}X.npy', allow_pickle=True) + train_labels = np.load(subject_dir / 'training' / f'training_s{subject}y.npy', allow_pickle=True) # Create events array events = np.column_stack(( - np.arange(0, len(labels) * data.shape[-1], data.shape[-1]), - np.zeros(len(labels), dtype=int), - labels + np.arange(0, len(train_labels) * train_data.shape[-1], train_data.shape[-1]), + np.zeros(len(train_labels), dtype=int), + train_labels )) # Create Raw object event_desc = {int(code): name for name, code in self.event_id.items()} - raw = mne.io.RawArray(np.hstack(data), info) + raw = mne.io.RawArray(np.hstack(train_data), info) raw.set_annotations(mne.annotations_from_events( events=events, event_desc=event_desc, - sfreq=sfreq + sfreq=self.sfreq )) - return {"0": {"0": raw}} + # Load test data + test_data = np.load(subject_dir / 'testing' / f'testing_s{subject}X.npy', allow_pickle=True) + # load labels from .txt + test_labels = np.loadtxt(Path(file_paths[0]) / 'final_MI_label.txt', dtype=int) + subject_labels = test_labels[(subject-1)*test_data.shape[0]:(subject)*test_data.shape[0]] + + test_events = np.column_stack(( + np.arange(0, len(subject_labels) * test_data.shape[-1], test_data.shape[-1]), + np.zeros(len(subject_labels), dtype=int), + subject_labels + )) + + # Create Raw object + test_raw = mne.io.RawArray(np.hstack(test_data), info) + test_raw.set_annotations(mne.annotations_from_events( + events=test_events, + event_desc=event_desc, + sfreq=self.sfreq + )) + + return {"0": {"0train": raw, "1test": test_raw}} def data_path( self, subject, path=None, force_update=False, update_path=None, verbose=None @@ -246,7 +309,7 @@ def data_path( raise ValueError(f"Subject {subject} not in {self.subject_list}") path = get_dataset_path("BEETL", path) - base_path = Path(path) + base_path = Path(os.path.join(path, f"MNE-{self.code:s}-data")) # Create the directory if it doesn't exist base_path.mkdir(parents=True, exist_ok=True) @@ -268,5 +331,24 @@ def data_path( processor=pooch.Unzip(extract_dir=os.path.splitext(file_name)[0]), downloader=pooch.HTTPDownloader(progressbar=True), ) + + # Download labels for final phase + if self.phase == "final": + file_list = dl.fs_get_file_list(FINAL_LABEL_TXT_ARTICLE_ID) + hash_file_list = dl.fs_get_file_hash(file_list) + id_file_list = dl.fs_get_file_id(file_list) + + for file_name in id_file_list.keys(): + fpath = base_path / file_name + if (not fpath.exists() or force_update) and file_name == "final_MI_label.txt": + fpath = base_path / file_name + if not fpath.exists() or force_update: + pooch.retrieve( + url=BASE_URL + id_file_list[file_name], + known_hash=hash_file_list[id_file_list[file_name]], + fname=file_name, + path=base_path, + downloader=pooch.HTTPDownloader(progressbar=True), + ) return [str(base_path)] diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index a86de8455..6588798b7 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -82,7 +82,7 @@ from .utils import _init_dataset_list from .Weibo2014 import Weibo2014 from .Zhou2016 import Zhou2016 -from .beetl import beetlA, beetlB +from .BEETL import BEETLA, BEETLB # Call this last in order to make sure the dataset list is populated with From 98d6d7f23aecd859a50bf4aef8e776548d0f6e5f Mon Sep 17 00:00:00 2001 From: Samuel Boehm Date: Mon, 23 Dec 2024 00:13:27 +0100 Subject: [PATCH 5/7] added some description to the datasets --- moabb/datasets/BEETL.py | 47 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/moabb/datasets/BEETL.py b/moabb/datasets/BEETL.py index 11daee1d2..bb880f1d8 100644 --- a/moabb/datasets/BEETL.py +++ b/moabb/datasets/BEETL.py @@ -23,15 +23,36 @@ class BEETLA(BaseDataset): In the leaderboard phase, this includes subjects 1-2, while in the final phase it includes subjects 1-3. + Note: for the BEETL competition, there was a leaderboard phase and a final phase. Both phases + contained data from two datasets, A and B. However, during leaderboard phase, dataset A contained + data from subjects 1-2, while dataset B contained data from subjects 3-5. During the final phase, + dataset A contained data from subjects 1-3, while dataset B contained data from subjects 4-5. + + For the leaderboard phase, the dataset contains only training data, while for the final phase it + includes both training and testing data. To learn more about the datasets in detail see [1]. + To lern more about the competition see [2]. + + For benchmarking the BEETL competition use phase "final", train on training data benchmark on testing data. + + + Data is sampled at 500 Hz and contains 63 EEG channels. The data underwent frequency-domain preprocessing + using a bandpass filter (1-100 Hz) and a 50 Hz notch filter to attenuate power line interference. + Motor imagery tasks include: - Rest (label 0) - Left hand (label 1) - Right hand (label 2) - Feet (label 3) + + Attributes + ---------- + phase : str + Either "leaderboard" or "final" References ---------- - .. [1] Original dataset: https://www.kaggle.com/competitions/beetl + .. [1] Original dataset: https://github.com/XiaoxiWei/NeurIPS_BEETL + .. [2] Competition: https://beetl.ai/introduction """ def __init__(self, phase="final"): @@ -200,7 +221,21 @@ class BEETLB(BaseDataset): Dataset B contains data from subjects with 200 Hz sampling rate and 32 EEG channels. In the leaderboard phase, this includes subjects 3-5, while in the final phase it includes - subjects 4-5. + subjects 4-5. + + Note: for the BEETL competition, there was a leaderboard phase and a final phase. Both phases + contained data from two datasets, A and B. However, during leaderboard phase, dataset A contained + data from subjects 1-2, while dataset B contained data from subjects 3-5. During the final phase, + dataset A contained data from subjects 1-3, while dataset B contained data from subjects 4-5. + + For the leaderboard phase, the dataset contains only training data, while for the final phase it + includes both training and testing data. To learn more about the datasets in detail see [1]. + To lern more about the competition see [2]. + + For benchmarking the BEETL competition use phase "final", train on training data benchmark on testing data. + + The data was filtered using a highpass filter with a cutoff frequency of 1 Hz and a + lowpass filter with a cutoff frequency of 100 Hz. Motor imagery tasks include: - Left hand (label 0) @@ -208,9 +243,15 @@ class BEETLB(BaseDataset): - Feet (label 2) - Rest (label 3) + Attributes + ---------- + phase : str + Either "leaderboard" or "final" + References ---------- - .. [1] Original dataset: https://www.kaggle.com/competitions/beetl + .. [1] Original dataset: https://github.com/XiaoxiWei/NeurIPS_BEETL + .. [2] Competition: https://beetl.ai/introduction """ def __init__(self, phase="final"): From 20c320d5033d66c7eb0ccef0f60e0335641ca64c Mon Sep 17 00:00:00 2001 From: Samuel Boehm Date: Mon, 23 Dec 2024 00:19:49 +0100 Subject: [PATCH 6/7] add more info to descirption --- moabb/datasets/BEETL.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/moabb/datasets/BEETL.py b/moabb/datasets/BEETL.py index bb880f1d8..306612d62 100644 --- a/moabb/datasets/BEETL.py +++ b/moabb/datasets/BEETL.py @@ -27,6 +27,10 @@ class BEETLA(BaseDataset): contained data from two datasets, A and B. However, during leaderboard phase, dataset A contained data from subjects 1-2, while dataset B contained data from subjects 3-5. During the final phase, dataset A contained data from subjects 1-3, while dataset B contained data from subjects 4-5. + + Note: for the competition the data is cut into 4 second trials, here the data is concatenated + into one session! In order to get the data as provided in the competition, the data has to be + cut into 4 second trials. For the leaderboard phase, the dataset contains only training data, while for the final phase it includes both training and testing data. To learn more about the datasets in detail see [1]. @@ -228,6 +232,10 @@ class BEETLB(BaseDataset): data from subjects 1-2, while dataset B contained data from subjects 3-5. During the final phase, dataset A contained data from subjects 1-3, while dataset B contained data from subjects 4-5. + Note: for the competition the data is cut into 4 second trials, here the data is concatenated + into one session! In order to get the data as provided in the competition, the data has to be + cut into 4 second trials. + For the leaderboard phase, the dataset contains only training data, while for the final phase it includes both training and testing data. To learn more about the datasets in detail see [1]. To lern more about the competition see [2]. From 4e2d1131c26b0bcfe75ef1c9d43cb184d53ba00a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:28:07 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks --- moabb/datasets/BEETL.py | 349 ++++++++++++++++++++++++------------- moabb/datasets/__init__.py | 2 +- 2 files changed, 233 insertions(+), 118 deletions(-) diff --git a/moabb/datasets/BEETL.py b/moabb/datasets/BEETL.py index 306612d62..fac8d443e 100644 --- a/moabb/datasets/BEETL.py +++ b/moabb/datasets/BEETL.py @@ -1,12 +1,15 @@ import logging import os from pathlib import Path + import mne import numpy as np import pooch + +import moabb.datasets.download as dl + from .base import BaseDataset from .download import get_dataset_path -import moabb.datasets.download as dl LOGGER = logging.getLogger(__name__) @@ -16,9 +19,10 @@ FINAL_EVALUATION_ARTICLE_ID = 16586213 FINAL_LABEL_TXT_ARTICLE_ID = 21602622 + class BEETLA(BaseDataset): """Motor Imagery dataset from BEETL Competition - Dataset A. - + Dataset A contains data from subjects with 500 Hz sampling rate and 63 EEG channels. In the leaderboard phase, this includes subjects 1-2, while in the final phase it includes subjects 1-3. @@ -31,7 +35,7 @@ class BEETLA(BaseDataset): Note: for the competition the data is cut into 4 second trials, here the data is concatenated into one session! In order to get the data as provided in the competition, the data has to be cut into 4 second trials. - + For the leaderboard phase, the dataset contains only training data, while for the final phase it includes both training and testing data. To learn more about the datasets in detail see [1]. To lern more about the competition see [2]. @@ -44,14 +48,14 @@ class BEETLA(BaseDataset): Motor imagery tasks include: - Rest (label 0) - - Left hand (label 1) + - Left hand (label 1) - Right hand (label 2) - Feet (label 3) - + Attributes ---------- phase : str - Either "leaderboard" or "final" + Either "leaderboard" or "final" References ---------- @@ -61,7 +65,7 @@ class BEETLA(BaseDataset): def __init__(self, phase="final"): """Initialize BEETL Dataset A. - + Parameters ---------- phase : str @@ -69,30 +73,83 @@ def __init__(self, phase="final"): """ if phase not in ["leaderboard", "final"]: raise ValueError("Phase must be either 'leaderboard' or 'final'") - + self.phase = phase subjects = list(range(1, 3)) if phase == "leaderboard" else list(range(1, 4)) - # Channel setup - self.ch_names = ['Fp1', 'Fz', 'F3', 'F7', 'FT9', 'FC5', 'FC1', 'C3', 'T7', 'TP9', - 'CP5', 'CP1', 'Pz', 'P3', 'P7', 'O1', 'Oz', 'O2', 'P4', 'P8', - 'TP10', 'CP6', 'CP2', 'C4', 'T8', 'FT10', 'FC6', 'FC2', 'F4', - 'F8', 'Fp2', 'AF7', 'AF3', 'AFz', 'F1', 'F5', 'FT7', 'FC3', - 'FCz', 'C1', 'C5', 'TP7', 'CP3', 'P1', 'P5', 'PO7', 'PO3', - 'POz', 'PO4', 'PO8', 'P6', 'P2', 'CPz', 'CP4', 'TP8', 'C6', - 'C2', 'FC4', 'FT8', 'F6', 'F2', 'AF4', 'AF8'] - + # Channel setup + self.ch_names = [ + "Fp1", + "Fz", + "F3", + "F7", + "FT9", + "FC5", + "FC1", + "C3", + "T7", + "TP9", + "CP5", + "CP1", + "Pz", + "P3", + "P7", + "O1", + "Oz", + "O2", + "P4", + "P8", + "TP10", + "CP6", + "CP2", + "C4", + "T8", + "FT10", + "FC6", + "FC2", + "F4", + "F8", + "Fp2", + "AF7", + "AF3", + "AFz", + "F1", + "F5", + "FT7", + "FC3", + "FCz", + "C1", + "C5", + "TP7", + "CP3", + "P1", + "P5", + "PO7", + "PO3", + "POz", + "PO4", + "PO8", + "P6", + "P2", + "CPz", + "CP4", + "TP8", + "C6", + "C2", + "FC4", + "FT8", + "F6", + "F2", + "AF4", + "AF8", + ] + self.sfreq = 500 super().__init__( subjects=subjects, sessions_per_subject=1, # Data is concatenated into one session - events=dict( - rest=0, - left_hand=1, - right_hand=2, - feet=3 - ), + events=dict(rest=0, left_hand=1, right_hand=2, feet=3), code="beetl", interval=[0, 4], # 4s trial window paradigm="imagery", @@ -101,70 +158,81 @@ def __init__(self, phase="final"): def _get_single_subject_data(self, subject): """Return data for a single subject.""" file_paths = self.data_path(subject) - + # Create MNE info - info = mne.create_info(ch_names=self.ch_names, sfreq=self.sfreq, ch_types=['eeg'] * len(self.ch_names)) - + info = mne.create_info( + ch_names=self.ch_names, + sfreq=self.sfreq, + ch_types=["eeg"] * len(self.ch_names), + ) + phase_str = "leaderboardMI" if self.phase == "leaderboard" else "finalMI" - subject_dir = Path(file_paths[0]) / phase_str / phase_str / f'S{subject}' - + subject_dir = Path(file_paths[0]) / phase_str / phase_str / f"S{subject}" + data_list = [] labels_list = [] - + # Load training data for race in range(1, 6): - data_file = subject_dir / 'training' / f'race{race}_padsData.npy' - label_file = subject_dir / 'training' / f'race{race}_padsLabel.npy' + data_file = subject_dir / "training" / f"race{race}_padsData.npy" + label_file = subject_dir / "training" / f"race{race}_padsLabel.npy" if data_file.exists() and label_file.exists(): data_list.append(np.load(data_file, allow_pickle=True)) labels_list.append(np.load(label_file, allow_pickle=True)) - + data = np.concatenate(data_list) labels = np.concatenate(labels_list) - + # Create events array - events = np.column_stack(( - np.arange(0, len(labels) * data.shape[-1], data.shape[-1]), - np.zeros(len(labels), dtype=int), - labels - )) - + events = np.column_stack( + ( + np.arange(0, len(labels) * data.shape[-1], data.shape[-1]), + np.zeros(len(labels), dtype=int), + labels, + ) + ) + # Create Raw object event_desc = {int(code): name for name, code in self.event_id.items()} raw = mne.io.RawArray(np.hstack(data), info) - raw.set_annotations(mne.annotations_from_events( - events=events, - event_desc=event_desc, - sfreq=self.sfreq - )) + raw.set_annotations( + mne.annotations_from_events( + events=events, event_desc=event_desc, sfreq=self.sfreq + ) + ) - # Load test data test_data_list = [] for race in range(6, 16): - data_file = subject_dir / 'testing' / f'race{race}_padsData.npy' + data_file = subject_dir / "testing" / f"race{race}_padsData.npy" if data_file.exists(): test_data_list.append(np.load(data_file, allow_pickle=True)) test_data = np.concatenate(test_data_list) - - # load labels from .txt - test_labels = np.loadtxt(Path(file_paths[0]) / 'final_MI_label.txt', dtype=int) - subject_labels = test_labels[(subject-1)*test_data.shape[0]:(subject)*test_data.shape[0]] - test_events = np.column_stack(( - np.arange(0, len(subject_labels) * test_data.shape[-1], test_data.shape[-1]), - np.zeros(len(subject_labels), dtype=int), - subject_labels - )) + # load labels from .txt + test_labels = np.loadtxt(Path(file_paths[0]) / "final_MI_label.txt", dtype=int) + subject_labels = test_labels[ + (subject - 1) * test_data.shape[0] : (subject) * test_data.shape[0] + ] + + test_events = np.column_stack( + ( + np.arange( + 0, len(subject_labels) * test_data.shape[-1], test_data.shape[-1] + ), + np.zeros(len(subject_labels), dtype=int), + subject_labels, + ) + ) # Create Raw object test_raw = mne.io.RawArray(np.hstack(test_data), info) - test_raw.set_annotations(mne.annotations_from_events( - events=test_events, - event_desc=event_desc, - sfreq=self.sfreq - )) + test_raw.set_annotations( + mne.annotations_from_events( + events=test_events, event_desc=event_desc, sfreq=self.sfreq + ) + ) return {"0": {"0train": raw, "1test": test_raw}} @@ -176,8 +244,7 @@ def data_path( raise ValueError(f"Subject {subject} not in {self.subject_list}") path = get_dataset_path("BEETL", path) - base_path = Path(os.path.join(path, f"MNE-{self.code:s}-data") -) + base_path = Path(os.path.join(path, f"MNE-{self.code:s}-data")) # Create the directory if it doesn't exist base_path.mkdir(parents=True, exist_ok=True) @@ -206,7 +273,9 @@ def data_path( for file_name in id_file_list.keys(): fpath = base_path / file_name - if (not fpath.exists() or force_update) and file_name == "final_MI_label.txt": + if ( + not fpath.exists() or force_update + ) and file_name == "final_MI_label.txt": fpath = base_path / file_name if not fpath.exists() or force_update: pooch.retrieve( @@ -222,10 +291,10 @@ def data_path( class BEETLB(BaseDataset): """Motor Imagery dataset from BEETL Competition - Dataset B. - + Dataset B contains data from subjects with 200 Hz sampling rate and 32 EEG channels. In the leaderboard phase, this includes subjects 3-5, while in the final phase it includes - subjects 4-5. + subjects 4-5. Note: for the BEETL competition, there was a leaderboard phase and a final phase. Both phases contained data from two datasets, A and B. However, during leaderboard phase, dataset A contained @@ -235,7 +304,7 @@ class BEETLB(BaseDataset): Note: for the competition the data is cut into 4 second trials, here the data is concatenated into one session! In order to get the data as provided in the competition, the data has to be cut into 4 second trials. - + For the leaderboard phase, the dataset contains only training data, while for the final phase it includes both training and testing data. To learn more about the datasets in detail see [1]. To lern more about the competition see [2]. @@ -254,7 +323,7 @@ class BEETLB(BaseDataset): Attributes ---------- phase : str - Either "leaderboard" or "final" + Either "leaderboard" or "final" References ---------- @@ -264,7 +333,7 @@ class BEETLB(BaseDataset): def __init__(self, phase="final"): """Initialize BEETL Dataset B. - + Parameters ---------- phase : str @@ -272,81 +341,125 @@ def __init__(self, phase="final"): """ if phase not in ["leaderboard", "final"]: raise ValueError("Phase must be either 'leaderboard' or 'final'") - + self.phase = phase subjects = list(range(3, 6)) if phase == "leaderboard" else list(range(4, 6)) - + super().__init__( subjects=subjects, sessions_per_subject=1, # Data is concatenated into one session - events=dict( - left_hand=0, - right_hand=1, - feet=2, - rest=3 - ), + events=dict(left_hand=0, right_hand=1, feet=2, rest=3), code="beetl", interval=[0, 4], # 4s trial window paradigm="imagery", ) - self.ch_names = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'FC5', 'FC1', 'FC2', 'FC6', - 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', - 'CPz', 'CP2', 'CP4', 'CP6', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', - 'P4', 'P6', 'P8'] + self.ch_names = [ + "Fp1", + "Fp2", + "F3", + "Fz", + "F4", + "FC5", + "FC1", + "FC2", + "FC6", + "C5", + "C3", + "C1", + "Cz", + "C2", + "C4", + "C6", + "CP5", + "CP3", + "CP1", + "CPz", + "CP2", + "CP4", + "CP6", + "P7", + "P5", + "P3", + "P1", + "Pz", + "P2", + "P4", + "P6", + "P8", + ] self.sfreq = 200 def _get_single_subject_data(self, subject): """Return data for a single subject.""" file_paths = self.data_path(subject) - - + # Create MNE info - info = mne.create_info(ch_names=self.ch_names, sfreq=self.sfreq, ch_types=['eeg'] * len(self.ch_names)) - + info = mne.create_info( + ch_names=self.ch_names, + sfreq=self.sfreq, + ch_types=["eeg"] * len(self.ch_names), + ) + # Load data phase_str = "leaderboardMI" if self.phase == "leaderboard" else "finalMI" - subject_dir = Path(file_paths[0]) / phase_str / phase_str / f'S{subject}' - + subject_dir = Path(file_paths[0]) / phase_str / phase_str / f"S{subject}" + # Load training data - train_data = np.load(subject_dir / 'training' / f'training_s{subject}X.npy', allow_pickle=True) - train_labels = np.load(subject_dir / 'training' / f'training_s{subject}y.npy', allow_pickle=True) - + train_data = np.load( + subject_dir / "training" / f"training_s{subject}X.npy", allow_pickle=True + ) + train_labels = np.load( + subject_dir / "training" / f"training_s{subject}y.npy", allow_pickle=True + ) + # Create events array - events = np.column_stack(( - np.arange(0, len(train_labels) * train_data.shape[-1], train_data.shape[-1]), - np.zeros(len(train_labels), dtype=int), - train_labels - )) - + events = np.column_stack( + ( + np.arange( + 0, len(train_labels) * train_data.shape[-1], train_data.shape[-1] + ), + np.zeros(len(train_labels), dtype=int), + train_labels, + ) + ) + # Create Raw object event_desc = {int(code): name for name, code in self.event_id.items()} raw = mne.io.RawArray(np.hstack(train_data), info) - raw.set_annotations(mne.annotations_from_events( - events=events, - event_desc=event_desc, - sfreq=self.sfreq - )) - + raw.set_annotations( + mne.annotations_from_events( + events=events, event_desc=event_desc, sfreq=self.sfreq + ) + ) + # Load test data - test_data = np.load(subject_dir / 'testing' / f'testing_s{subject}X.npy', allow_pickle=True) + test_data = np.load( + subject_dir / "testing" / f"testing_s{subject}X.npy", allow_pickle=True + ) # load labels from .txt - test_labels = np.loadtxt(Path(file_paths[0]) / 'final_MI_label.txt', dtype=int) - subject_labels = test_labels[(subject-1)*test_data.shape[0]:(subject)*test_data.shape[0]] - - test_events = np.column_stack(( - np.arange(0, len(subject_labels) * test_data.shape[-1], test_data.shape[-1]), - np.zeros(len(subject_labels), dtype=int), - subject_labels - )) + test_labels = np.loadtxt(Path(file_paths[0]) / "final_MI_label.txt", dtype=int) + subject_labels = test_labels[ + (subject - 1) * test_data.shape[0] : (subject) * test_data.shape[0] + ] + + test_events = np.column_stack( + ( + np.arange( + 0, len(subject_labels) * test_data.shape[-1], test_data.shape[-1] + ), + np.zeros(len(subject_labels), dtype=int), + subject_labels, + ) + ) # Create Raw object test_raw = mne.io.RawArray(np.hstack(test_data), info) - test_raw.set_annotations(mne.annotations_from_events( - events=test_events, - event_desc=event_desc, - sfreq=self.sfreq - )) + test_raw.set_annotations( + mne.annotations_from_events( + events=test_events, event_desc=event_desc, sfreq=self.sfreq + ) + ) return {"0": {"0train": raw, "1test": test_raw}} @@ -380,7 +493,7 @@ def data_path( processor=pooch.Unzip(extract_dir=os.path.splitext(file_name)[0]), downloader=pooch.HTTPDownloader(progressbar=True), ) - + # Download labels for final phase if self.phase == "final": file_list = dl.fs_get_file_list(FINAL_LABEL_TXT_ARTICLE_ID) @@ -389,7 +502,9 @@ def data_path( for file_name in id_file_list.keys(): fpath = base_path / file_name - if (not fpath.exists() or force_update) and file_name == "final_MI_label.txt": + if ( + not fpath.exists() or force_update + ) and file_name == "final_MI_label.txt": fpath = base_path / file_name if not fpath.exists() or force_update: pooch.retrieve( diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py index 6588798b7..393125f3d 100644 --- a/moabb/datasets/__init__.py +++ b/moabb/datasets/__init__.py @@ -13,6 +13,7 @@ from .alex_mi import AlexMI from .alphawaves import Rodrigues2017 from .bbci_eeg_fnirs import Shin2017A, Shin2017B +from .BEETL import BEETLA, BEETLB # Depreciated datasets (will be removed in the future): from .bnci import BNCI2014001 # noqa: F401 @@ -82,7 +83,6 @@ from .utils import _init_dataset_list from .Weibo2014 import Weibo2014 from .Zhou2016 import Zhou2016 -from .BEETL import BEETLA, BEETLB # Call this last in order to make sure the dataset list is populated with