New feature for removing heart artifacts from EEG or ESG data using a…

… Principal Component Analysis - Optimal Basis Sets (PCA-OBS) algorithm (#13037) Co-authored-by: Emma Bailey <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steinn Magnusson <[email protected]> Co-authored-by: Eric Larson <[email protected]> Co-authored-by: emma-bailey <[email protected]> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel McCloy <[email protected]>
mne-tools · Jan 20, 2025 · 96fb55d · 96fb55d
1 parent 388a426
commit 96fb55d
Show file tree

Hide file tree

Showing 17 changed files with 706 additions and 3 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -218,6 +218,9 @@ jobs:
       - restore_cache:
           keys:
             - data-cache-phantom-kit
+      - restore_cache:
+          keys:
+            - data-cache-ds004388
       - run:
           name: Get data
           # This limit could be increased, but this is helpful for finding slow ones
@@ -252,7 +255,7 @@ jobs:
           name: Check sphinx log for warnings (which are treated as errors)
           when: always
           command: |
-            ! grep "^.* (WARNING|ERROR): .*$" sphinx_log.txt
+            ! grep "^.*\(WARNING\|ERROR\): " sphinx_log.txt
       - run:
           name: Show profiling output
           when: always
@@ -393,6 +396,10 @@ jobs:
           key: data-cache-phantom-kit
           paths:
             - ~/mne_data/MNE-phantom-KIT-data  # (1 G)
+      - save_cache:
+          key: data-cache-ds004388
+          paths:
+            - ~/mne_data/ds004388  # (1.8 G)
 
 
   linkcheck:

diff --git a/doc/api/datasets.rst b/doc/api/datasets.rst
@@ -18,6 +18,7 @@ Datasets
    brainstorm.bst_auditory.data_path
    brainstorm.bst_resting.data_path
    brainstorm.bst_raw.data_path
+   default_path
    eegbci.load_data
    eegbci.standardize
    fetch_aparc_sub_parcellation

diff --git a/doc/api/preprocessing.rst b/doc/api/preprocessing.rst
@@ -116,6 +116,7 @@ Projections:
    read_ica_eeglab
    read_fine_calibration
    write_fine_calibration
+   apply_pca_obs
 
 :py:mod:`mne.preprocessing.nirs`:
 

diff --git a/doc/changes/devel/13037.newfeature.rst b/doc/changes/devel/13037.newfeature.rst
@@ -0,0 +1 @@
+Add PCA-OBS preprocessing for the removal of heart-artefacts from EEG or ESG datasets via :func:`mne.preprocessing.apply_pca_obs`, by :newcontrib:`Emma Bailey` and :newcontrib:`Steinn Hauser Magnusson`.
diff --git a/doc/changes/names.inc b/doc/changes/names.inc
@@ -73,6 +73,7 @@
 .. _Eberhard Eich: https://github.com/ebeich
 .. _Eduard Ort: https://github.com/eort
 .. _Emily Stephen: https://github.com/emilyps14
+.. _Emma Bailey: https://www.cbs.mpg.de/employees/bailey
 .. _Enrico Varano: https://github.com/enricovara/
 .. _Enzo Altamiranda: https://www.linkedin.com/in/enzoalt
 .. _Eric Larson: https://larsoner.com
@@ -284,6 +285,7 @@
 .. _Stanislas Chambon: https://github.com/Slasnista
 .. _Stefan Appelhoff: https://stefanappelhoff.com
 .. _Stefan Repplinger: https://github.com/stfnrpplngr
+.. _Steinn Hauser Magnusson: https://github.com/steinnhauser
 .. _Steven Bethard: https://github.com/bethard
 .. _Steven Bierer: https://github.com/neurolaunch
 .. _Steven Gutstein: https://github.com/smgutstein

diff --git a/doc/conf.py b/doc/conf.py
@@ -355,6 +355,7 @@
     "n_frequencies",
     "n_tests",
     "n_samples",
+    "n_peaks",
     "n_permutations",
     "nchan",
     "n_points",

diff --git a/doc/references.bib b/doc/references.bib
@@ -1335,6 +1335,16 @@ @inproceedings{NdiayeEtAl2016
   year = {2016}
 }
 
+@article{NiazyEtAl2005,
+  author = {Niazy, R. K. and Beckmann, C.F. and Iannetti, G.D. and Brady, J. M. and Smith, S. M.},
+  title = {Removal of FMRI environment artifacts from EEG data using optimal basis sets},
+  journal = {NeuroImage},
+  year = {2005},
+  volume = {28},
+  pages = {720-737},
+  doi = {10.1016/j.neuroimage.2005.06.067.}
+}
+
 @article{NicholsHolmes2002,
   author = {Nichols, Thomas E. and Holmes, Andrew P.},
   doi = {10.1002/hbm.1058},

diff --git a/examples/preprocessing/esg_rm_heart_artefact_pcaobs.py b/examples/preprocessing/esg_rm_heart_artefact_pcaobs.py
@@ -0,0 +1,196 @@
+"""
+.. _ex-pcaobs:
+
+=====================================================================================
+Principal Component Analysis - Optimal Basis Sets (PCA-OBS) removing cardiac artefact
+=====================================================================================
+
+This script shows an example of how to use an adaptation of PCA-OBS
+:footcite:`NiazyEtAl2005`. PCA-OBS was originally designed to remove
+the ballistocardiographic artefact in simultaneous EEG-fMRI. Here, it
+has been adapted to remove the delay between the detected R-peak and the
+ballistocardiographic artefact such that the algorithm can be applied to
+remove the cardiac artefact in EEG (electroencephalography) and ESG
+(electrospinography) data. We will illustrate how it works by applying the
+algorithm to ESG data, where the effect of removal is most pronounced.
+
+See: https://www.biorxiv.org/content/10.1101/2024.09.05.611423v1
+for more details on the dataset and application for ESG data.
+
+"""
+
+# Authors: Emma Bailey <[email protected]>,
+#          Steinn Hauser Magnusson <[email protected]>
+# License: BSD-3-Clause
+# Copyright the MNE-Python contributors.
+
+import glob
+
+import numpy as np
+
+# %%
+# Download sample subject data from OpenNeuro if you haven't already.
+# This will download simultaneous EEG and ESG data from a single run of a
+# single participant after median nerve stimulation of the left wrist.
+import openneuro
+from matplotlib import pyplot as plt
+
+import mne
+from mne import Epochs, events_from_annotations
+from mne.io import read_raw_eeglab
+from mne.preprocessing import find_ecg_events, fix_stim_artifact
+
+# add the path where you want the OpenNeuro data downloaded. Each run is ~2GB of data
+ds = "ds004388"
+target_dir = mne.datasets.default_path() / ds
+run_name = "sub-001/eeg/*median_run-03_eeg*.set"
+if not glob.glob(str(target_dir / run_name)):
+    target_dir.mkdir(exist_ok=True)
+    openneuro.download(dataset=ds, target_dir=target_dir, include=run_name[:-4])
+block_files = glob.glob(str(target_dir / run_name))
+assert len(block_files) == 1
+
+# %%
+# Define the esg channels (arranged in two patches over the neck and lower back).
+
+esg_chans = [
+    "S35",
+    "S24",
+    "S36",
+    "Iz",
+    "S17",
+    "S15",
+    "S32",
+    "S22",
+    "S19",
+    "S26",
+    "S28",
+    "S9",
+    "S13",
+    "S11",
+    "S7",
+    "SC1",
+    "S4",
+    "S18",
+    "S8",
+    "S31",
+    "SC6",
+    "S12",
+    "S16",
+    "S5",
+    "S30",
+    "S20",
+    "S34",
+    "S21",
+    "S25",
+    "L1",
+    "S29",
+    "S14",
+    "S33",
+    "S3",
+    "L4",
+    "S6",
+    "S23",
+]
+
+# Interpolation window in seconds for ESG data to remove stimulation artefact
+tstart_esg = -7e-3
+tmax_esg = 7e-3
+
+# Define timing of heartbeat epochs in seconds relative to R-peaks
+iv_baseline = [-400e-3, -300e-3]
+iv_epoch = [-400e-3, 600e-3]
+
+# %%
+# Next, we perform minimal preprocessing including removing the
+# stimulation artefact, downsampling and filtering.
+
+raw = read_raw_eeglab(block_files[0], verbose="error")
+raw.set_channel_types(dict(ECG="ecg"))
+# Isolate the ESG channels (include the ECG channel for R-peak detection)
+raw.pick(esg_chans + ["ECG"])
+# Trim duration and downsample (from 10kHz) to improve example speed
+raw.crop(0, 60).load_data().resample(2000)
+
+# Find trigger timings to remove the stimulation artefact
+events, event_dict = events_from_annotations(raw)
+trigger_name = "Median - Stimulation"
+
+fix_stim_artifact(
+    raw,
+    events=events,
+    event_id=event_dict[trigger_name],
+    tmin=tstart_esg,
+    tmax=tmax_esg,
+    mode="linear",
+    stim_channel=None,
+)
+
+# %%
+# Find ECG events and add to the raw structure as event annotations.
+
+ecg_events, ch_ecg, average_pulse = find_ecg_events(raw, ch_name="ECG")
+ecg_event_samples = np.asarray(
+    [[ecg_event[0] for ecg_event in ecg_events]]
+)  # Samples only
+
+qrs_event_time = [
+    x / raw.info["sfreq"] for x in ecg_event_samples.reshape(-1)
+]  # Divide by sampling rate to make times
+duration = np.repeat(0.0, len(ecg_event_samples))
+description = ["qrs"] * len(ecg_event_samples)
+
+raw.annotations.append(
+    qrs_event_time, duration, description, ch_names=[esg_chans] * len(qrs_event_time)
+)
+
+# %%
+# Create evoked response around the detected R-peaks
+# before and after cardiac artefact correction.
+
+events, event_ids = events_from_annotations(raw)
+event_id_dict = {key: value for key, value in event_ids.items() if key == "qrs"}
+epochs = Epochs(
+    raw,
+    events,
+    event_id=event_id_dict,
+    tmin=iv_epoch[0],
+    tmax=iv_epoch[1],
+    baseline=tuple(iv_baseline),
+)
+evoked_before = epochs.average()
+
+# Apply function - modifies the data in place. Optionally high-pass filter
+# the data before applying PCA-OBS to remove low frequency drifts
+raw = mne.preprocessing.apply_pca_obs(
+    raw, picks=esg_chans, n_jobs=5, qrs_times=raw.times[ecg_event_samples.reshape(-1)]
+)
+
+epochs = Epochs(
+    raw,
+    events,
+    event_id=event_id_dict,
+    tmin=iv_epoch[0],
+    tmax=iv_epoch[1],
+    baseline=tuple(iv_baseline),
+)
+evoked_after = epochs.average()
+
+# %%
+# Compare evoked responses to assess completeness of artefact removal.
+
+fig, axes = plt.subplots(1, 1, layout="constrained")
+data_before = evoked_before.get_data(units=dict(eeg="uV")).T
+data_after = evoked_after.get_data(units=dict(eeg="uV")).T
+hs = list()
+hs.append(axes.plot(epochs.times, data_before, color="k")[0])
+hs.append(axes.plot(epochs.times, data_after, color="green", label="after")[0])
+axes.set(ylim=[-500, 1000], ylabel="Amplitude (µV)", xlabel="Time (s)")
+axes.set(title="ECG artefact removal using PCA-OBS")
+axes.legend(hs, ["before", "after"])
+plt.show()
+
+# %%
+# References
+# ----------
+# .. footbibliography::
diff --git a/mne/datasets/__init__.pyi b/mne/datasets/__init__.pyi
@@ -6,6 +6,7 @@ __all__ = [
     "epilepsy_ecog",
     "erp_core",
     "eyelink",
+    "default_path",
     "fetch_aparc_sub_parcellation",
     "fetch_dataset",
     "fetch_fsaverage",
@@ -70,6 +71,7 @@ from ._infant import fetch_infant_template
 from ._phantom.base import fetch_phantom
 from .utils import (
     _download_all_example_data,
+    default_path,
     fetch_aparc_sub_parcellation,
     fetch_hcp_mmp_parcellation,
     has_dataset,

diff --git a/mne/datasets/utils.py b/mne/datasets/utils.py
@@ -2,6 +2,7 @@
 # License: BSD-3-Clause
 # Copyright the MNE-Python contributors.
 
+import glob
 import importlib
 import inspect
 import logging
@@ -92,6 +93,22 @@ def _dataset_version(path, name):
     return version
 
 
+@verbose
+def default_path(*, verbose=None):
+    """Get the default MNE_DATA path.
+
+    Parameters
+    ----------
+    %(verbose)s
+
+    Returns
+    -------
+    data_path : instance of Path
+        Path to the default MNE_DATA directory.
+    """
+    return _get_path(None, None, None)
+
+
 def _get_path(path, key, name):
     """Get a dataset path."""
     # 1. Input
@@ -113,7 +130,8 @@ def _get_path(path, key, name):
         return path
     # 4. ~/mne_data (but use a fake home during testing so we don't
     #    unnecessarily create ~/mne_data)
-    logger.info(f"Using default location ~/mne_data for {name}...")
+    extra = f" for {name}" if name else ""
+    logger.info(f"Using default location ~/mne_data{extra}...")
     path = Path(os.getenv("_MNE_FAKE_HOME_DIR", "~")).expanduser() / "mne_data"
     if not path.is_dir():
         logger.info(f"Creating {path}")
@@ -319,6 +337,8 @@ def _download_all_example_data(verbose=True):
     #
     # verbose=True by default so we get nice status messages.
     # Consider adding datasets from here to CircleCI for PR-auto-build
+    import openneuro
+
     paths = dict()
     for kind in (
         "sample testing misc spm_face somato hf_sef multimodal "
@@ -375,6 +395,14 @@ def _download_all_example_data(verbose=True):
     limo.load_data(subject=1, update_path=True)
     logger.info("[done limo]")
 
+    # for ESG
+    ds = "ds004388"
+    target_dir = default_path() / ds
+    run_name = "sub-001/eeg/*median_run-03_eeg*.set"
+    if not glob.glob(str(target_dir / run_name)):
+        target_dir.mkdir(exist_ok=True)
+        openneuro.download(dataset=ds, target_dir=target_dir, include=run_name[:-4])
+
 
 @verbose
 def fetch_aparc_sub_parcellation(subjects_dir=None, verbose=None):

diff --git a/mne/preprocessing/__init__.pyi b/mne/preprocessing/__init__.pyi
@@ -44,6 +44,7 @@ __all__ = [
     "realign_raw",
     "regress_artifact",
     "write_fine_calibration",
+    "apply_pca_obs",
 ]
 from . import eyetracking, ieeg, nirs
 from ._annotate_amplitude import annotate_amplitude
@@ -56,6 +57,7 @@ from ._fine_cal import (
     write_fine_calibration,
 )
 from ._lof import find_bad_channels_lof
+from ._pca_obs import apply_pca_obs
 from ._peak_finder import peak_finder
 from ._regress import EOGRegression, read_eog_regression, regress_artifact
 from .artifact_detection import (
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add PCA-OBS preprocessing for the removal of heart-artefacts from EEG or ESG datasets via :func:`mne.preprocessing.apply_pca_obs`, by :newcontrib:`Emma Bailey` and :newcontrib:`Steinn Hauser Magnusson`.