diff --git a/.codeclimate.yml b/.codeclimate.yml deleted file mode 100644 index c4946e57d8..0000000000 --- a/.codeclimate.yml +++ /dev/null @@ -1,43 +0,0 @@ -engines: - pep8: - enabled: true - checks: - E501: - enabled: false - E203: - enabled: false - E302: - enabled: false - E303: - enabled: false - -checks: - argument-count: - enabled: false - complex-logic: - enabled: false - file-lines: - enabled: false - method-complexity: - enabled: false - method-count: - enabled: false - method-lines: - enabled: false - nested-control-flow: - enabled: true - return-statements: - enabled: true - similar-code: - enabled: false - identical-code: - enabled: false - -exclude_patterns: - - "tests/" - - "scripts/" - - "benchmarks/" - - "studies/" - - "data/" - - "docs/img/" - - "docs/readme/" diff --git a/.gitignore b/.gitignore index 6d9fdf82d3..d222e24396 100644 --- a/.gitignore +++ b/.gitignore @@ -215,3 +215,4 @@ studies/complexity_structure/README_cache/ studies/complexity_structure/manuscript_cache/ studies/complexity_eeg/data_attractor.csv studies/complexity_eeg/data_delay.csv +testing.ipynb diff --git a/MANIFEST.in b/MANIFEST.in index 1e49636c11..d476c8d8ed 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,8 +4,9 @@ include NEWS.rst include LICENSE include README.rst -recursive-include tests * +recursive-exclude tests * recursive-exclude * __pycache__ recursive-exclude * *.py[co] recursive-include docs *.rst conf.py *.jpg *.png *.gif + diff --git a/NEWS.rst b/NEWS.rst index 0cfd6a1047..cb1a79a668 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -1,6 +1,17 @@ News ===== +0.2.8 +------------------- +New Features ++++++++++++++ + +* New feature `events_find()`: is now able to combine multiple digital input channels, + retrieve events from the combined events channel and differentiate between the inputs that + occur simultaneously. + + + 0.2.4 ------------------- Fixes diff --git a/data/eeg_1min_200hz.pickle b/data/eeg_1min_200hz.pickle index 62d0b77b28..112ee649ac 100644 Binary files a/data/eeg_1min_200hz.pickle and b/data/eeg_1min_200hz.pickle differ diff --git a/docs/examples/ecg_hrv/ecg_hrv.ipynb b/docs/examples/ecg_hrv/ecg_hrv.ipynb index 0766e8a829..84cec5c3ce 100644 --- a/docs/examples/ecg_hrv/ecg_hrv.ipynb +++ b/docs/examples/ecg_hrv/ecg_hrv.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Heart Rate Varability (HRV)" + "# Heart Rate Variability (HRV)" ] }, { diff --git a/neurokit2/__init__.py b/neurokit2/__init__.py index 6e7a7e8790..f9ce57b81b 100644 --- a/neurokit2/__init__.py +++ b/neurokit2/__init__.py @@ -1,4 +1,5 @@ """Top-level package for NeuroKit.""" + import datetime import platform @@ -32,12 +33,12 @@ from .video import * # Info -__version__ = "0.2.7" +__version__ = "0.2.8" # Maintainer info __author__ = "The NeuroKit development team" -__email__ = "dom.makowski@gmail.com" +__email__ = "D.Makowski@sussex.ac.uk" # Citation diff --git a/neurokit2/complexity/utils_complexity_symbolize.py b/neurokit2/complexity/utils_complexity_symbolize.py index 942a78cfaa..5f86f52fbf 100644 --- a/neurokit2/complexity/utils_complexity_symbolize.py +++ b/neurokit2/complexity/utils_complexity_symbolize.py @@ -184,8 +184,8 @@ def complexity_symbolize(signal, method="mean", c=3, random_state=None, show=Fal symbolic = (signal > np.nanmean(signal)).astype(int) if show is True: df = pd.DataFrame({"A": signal, "B": signal}) - df["A"][df["A"] > np.nanmean(signal)] = np.nan - df["B"][df["B"] <= np.nanmean(signal)] = np.nan + df.loc[df["A"] > np.nanmean(signal), "A"] = np.nan + df.loc[df["B"] <= np.nanmean(signal), "B"] = np.nan df.plot() plt.axhline(y=np.nanmean(signal), color="r", linestyle="dotted") plt.title("Method A") @@ -194,8 +194,8 @@ def complexity_symbolize(signal, method="mean", c=3, random_state=None, show=Fal symbolic = (signal > np.nanmedian(signal)).astype(int) if show is True: df = pd.DataFrame({"A": signal, "B": signal}) - df["A"][df["A"] > np.nanmedian(signal)] = np.nan - df["B"][df["B"] <= np.nanmedian(signal)] = np.nan + df.loc[df["A"] > np.nanmedian(signal), "A"] = np.nan + df.loc[df["B"] <= np.nanmedian(signal), "B"] = np.nan df.plot() plt.axhline(y=np.nanmean(signal), color="r", linestyle="dotted") plt.title("Binarization by median") @@ -206,8 +206,9 @@ def complexity_symbolize(signal, method="mean", c=3, random_state=None, show=Fal symbolic = np.logical_or(signal < m - sd, signal > m + sd).astype(int) if show is True: df = pd.DataFrame({"A": signal, "B": signal}) - df["A"][np.logical_or(signal < m - sd, signal > m + sd)] = np.nan - df["B"][~np.isnan(df["A"])] = np.nan + condition = np.logical_or(signal < m - sd, signal > m + sd) + df.loc[condition, "A"] = np.nan + df.loc[~np.isnan(df["A"]), "B"] = np.nan df.plot() plt.axhline(y=m - sd, color="r", linestyle="dotted") plt.axhline(y=m + sd, color="r", linestyle="dotted") @@ -217,8 +218,8 @@ def complexity_symbolize(signal, method="mean", c=3, random_state=None, show=Fal symbolic = np.signbit(np.diff(signal)).astype(int) if show is True: df = pd.DataFrame({"A": signal, "B": signal}) - df["A"][np.insert(symbolic, 0, False)] = np.nan - df["B"][~np.isnan(df["A"])] = np.nan + df.loc[np.insert(symbolic, 0, False), "A"] = np.nan + df.loc[~np.isnan(df["A"]), "B"] = np.nan df.plot() plt.title("Method C") diff --git a/neurokit2/data/database.py b/neurokit2/data/database.py index bacb39fa68..86aa6c94b5 100644 --- a/neurokit2/data/database.py +++ b/neurokit2/data/database.py @@ -1,9 +1,7 @@ import pathlib -import urllib.parse +import urllib import zipfile -import requests - def download_from_url(url, destination_path=None): """**Download Files from URLs** @@ -26,7 +24,7 @@ def download_from_url(url, destination_path=None): destination_path = _download_path_sanitize(url, destination_path) # Download the file - response = requests.get(url) + response = urllib.request.urlopen(url) if response.status_code == 200: with destination_path.open("wb") as file: diff --git a/neurokit2/data/read_acqknowledge.py b/neurokit2/data/read_acqknowledge.py index 3932ec2727..90a3df5714 100644 --- a/neurokit2/data/read_acqknowledge.py +++ b/neurokit2/data/read_acqknowledge.py @@ -1,15 +1,15 @@ # -*- coding: utf-8 -*- import os +from collections import Counter + import numpy as np import pandas as pd from ..signal import signal_resample -def read_acqknowledge( - filename, sampling_rate="max", resample_method="interpolation", impute_missing=True -): +def read_acqknowledge(filename, sampling_rate="max", resample_method="interpolation", impute_missing=True): """**Read and format a BIOPAC's AcqKnowledge file into a pandas' dataframe** The function outputs both the dataframe and the sampling rate (retrieved from the @@ -69,10 +69,7 @@ def read_acqknowledge( filename += ".acq" if os.path.exists(filename) is False: - raise ValueError( - "NeuroKit error: read_acqknowledge(): couldn't" - " find the following file: " + filename - ) + raise ValueError("NeuroKit error: read_acqknowledge(): couldn't" " find the following file: " + filename) # Read file file = bioread.read(filename) @@ -84,24 +81,34 @@ def read_acqknowledge( freq_list.append(file.named_channels[channel].samples_per_second) sampling_rate = np.max(freq_list) + # Counter for checking duplicate channel names + channel_counter = Counter() + # Loop through channels data = {} - for channel in file.named_channels: - signal = np.array(file.named_channels[channel].data) + for channel_num, channel in enumerate(file.channels): + signal = np.array(file.channels[channel_num].data) # Fill signal interruptions if impute_missing is True and np.isnan(np.sum(signal)): signal = pd.Series(signal).fillna(method="pad").values # Resample if necessary - if file.named_channels[channel].samples_per_second != sampling_rate: + if file.channels[channel_num].samples_per_second != sampling_rate: signal = signal_resample( signal, - sampling_rate=file.named_channels[channel].samples_per_second, + sampling_rate=file.channels[channel_num].samples_per_second, desired_sampling_rate=sampling_rate, method=resample_method, ) - data[channel] = signal + + # If there is a duplicate channel name, append a number + if channel_counter[channel.name] == 0: + data[channel.name] = signal + else: + data[f"{channel.name} ({channel_counter[channel.name]})"] = signal + + channel_counter[channel.name] += 1 # Sanitize lengths lengths = [] diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index bb7ac9250e..35226f84a2 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -79,10 +79,16 @@ def read_xdf(filename, upsample=2, fillmissing=None): # Special treatment for some devices if stream["info"]["name"][0] == "Muse": - # Rename GYRO channels and add ACCelerometer + # Rename GYRO channels if stream["info"]["type"][0] == "GYRO": dat = dat.rename(columns={"X": "GYRO_X", "Y": "GYRO_Y", "Z": "GYRO_Z"}) - dat["ACC"] = np.sqrt(dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2) + # Compute movement + dat["GYRO"] = np.sqrt(dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2) + + if stream["info"]["type"][0] == "ACC": + dat = dat.rename(columns={"X": "ACC_X", "Y": "ACC_Y", "Z": "ACC_Z"}) + # Compute acceleration + dat["ACC"] = np.sqrt(dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2) # Muse - PPG data has three channels: ambient, infrared, red if stream["info"]["type"][0] == "PPG": diff --git a/neurokit2/ecg/ecg_clean.py b/neurokit2/ecg/ecg_clean.py index 7319d3ba5b..9473591083 100644 --- a/neurokit2/ecg/ecg_clean.py +++ b/neurokit2/ecg/ecg_clean.py @@ -13,7 +13,7 @@ def ecg_clean(ecg_signal, sampling_rate=1000, method="neurokit", **kwargs): """**ECG Signal Cleaning** Clean an ECG signal to remove noise and improve peak-detection accuracy. Different cleaning - method are implemented. + methods are implemented. * ``'neurokit'`` (default): 0.5 Hz high-pass butterworth filter (order = 5), followed by powerline filtering (see ``signal_filter()``). By default, ``powerline = 50``. @@ -28,7 +28,8 @@ def ecg_clean(ecg_signal, sampling_rate=1000, method="neurokit", **kwargs): description!** * ``'engzeemod2012'``: Method used in Engelse & Zeelenberg (1979). **Please help providing a better description!** - + * ``'vg'``: Method used in Visibility Graph Based Detection Emrich et al. (2023) + and Koka et al. (2022). A 4.0 Hz high-pass butterworth filter (order = 2). Parameters ---------- @@ -39,7 +40,7 @@ def ecg_clean(ecg_signal, sampling_rate=1000, method="neurokit", **kwargs): method : str The processing pipeline to apply. Can be one of ``"neurokit"`` (default), ``"biosppy"``, ``"pantompkins1985"``, ``"hamilton2002"``, ``"elgendi2010"``, - ``"engzeemod2012"``. + ``"engzeemod2012"``, ``'vg'``. **kwargs Other arguments to be passed to specific methods. @@ -71,6 +72,7 @@ def ecg_clean(ecg_signal, sampling_rate=1000, method="neurokit", **kwargs): "ECG_Hamilton" : nk.ecg_clean(ecg, sampling_rate=250, method="hamilton2002"), "ECG_Elgendi" : nk.ecg_clean(ecg, sampling_rate=250, method="elgendi2010"), "ECG_EngZeeMod" : nk.ecg_clean(ecg, sampling_rate=250, method="engzeemod2012"), + "ECG_VG" : nk.ecg_clean(ecg, sampling_rate=250, method="vg"), "ECG_TC" : nk.ecg_clean(ecg, sampling_rate=250, method="templateconvolution") }) @@ -91,6 +93,9 @@ def ecg_clean(ecg_signal, sampling_rate=1000, method="neurokit", **kwargs): * Elgendi, M., Jonkman, M., & De Boer, F. (2010). Frequency Bands Effects on QRS Detection. Biosignals, Proceedings of the Third International Conference on Bio-inspired Systems and Signal Processing, 428-431. + * Emrich, J., Koka, T., Wirth, S., & Muma, M. (2023), Accelerated Sample-Accurate R-Peak + Detectors Based on Visibility Graphs. 31st European Signal Processing Conference + (EUSIPCO), 1090-1094, doi: 10.23919/EUSIPCO58844.2023.10290007 """ ecg_signal = as_vector(ecg_signal) @@ -118,7 +123,14 @@ def ecg_clean(ecg_signal, sampling_rate=1000, method="neurokit", **kwargs): clean = _ecg_clean_elgendi(ecg_signal, sampling_rate) elif method in ["engzee", "engzee2012", "engzeemod", "engzeemod2012"]: clean = _ecg_clean_engzee(ecg_signal, sampling_rate) - elif method in ["vg", "vgraph", "koka2022"]: + elif method in ["vg", "vgraph", "fastnvg", "emrich", "emrich2023"]: + clean = _ecg_clean_vgraph(ecg_signal, sampling_rate) + elif method in ["koka2022", "koka"]: + warn( + "The 'koka2022' method has been replaced by 'emrich2023'." + " Please replace method='koka2022' by method='emrich2023'.", + category=NeuroKitWarning, + ) clean = _ecg_clean_vgraph(ecg_signal, sampling_rate) elif method in ["templateconvolution"]: clean = _ecg_clean_templateconvolution(ecg_signal, sampling_rate) @@ -141,7 +153,7 @@ def ecg_clean(ecg_signal, sampling_rate=1000, method="neurokit", **kwargs): "NeuroKit error: ecg_clean(): 'method' should be " "one of 'neurokit', 'biosppy', 'pantompkins1985'," " 'hamilton2002', 'elgendi2010', 'engzeemod2012'," - " 'templateconvolution'." + " 'templateconvolution', 'vg'." ) return clean @@ -168,9 +180,7 @@ def _ecg_clean_nk(ecg_signal, sampling_rate=1000, **kwargs): order=5, ) - clean = signal_filter( - signal=clean, sampling_rate=sampling_rate, method="powerline", **kwargs - ) + clean = signal_filter(signal=clean, sampling_rate=sampling_rate, method="powerline", **kwargs) return clean @@ -194,9 +204,7 @@ def _ecg_clean_biosppy(ecg_signal, sampling_rate=1000): # -> get_filter() # -> _norm_freq() - frequency = ( - 2 * np.array(frequency) / sampling_rate - ) # Normalize frequency to Nyquist Frequency (Fs/2). + frequency = 2 * np.array(frequency) / sampling_rate # Normalize frequency to Nyquist Frequency (Fs/2). # -> get coeffs a = np.array([1]) @@ -305,10 +313,15 @@ def _ecg_clean_engzee(ecg_signal, sampling_rate=1000): # ============================================================================= -# Engzee Modified (2012) +# Visibility-Graph Detector - Emrich et al. (2023) & Koka et al. (2022) # ============================================================================= def _ecg_clean_vgraph(ecg_signal, sampling_rate=1000): - """Filtering used by Taulant Koka and Michael Muma (2022). + """Filtering used for Visibility-Graph Detectors Emrich et al. (2023) and Koka et al. (2022). + + - J. Emrich, T. Koka, S. Wirth and M. Muma, "Accelerated Sample-Accurate R-Peak + Detectors Based on Visibility Graphs," 31st European Signal Processing + Conference (EUSIPCO), 2023, pp. 1090-1094, doi: 10.23919/EUSIPCO58844.2023.10290007, + https://ieeexplore.ieee.org/document/10290007 - T. Koka and M. Muma (2022), Fast and Sample Accurate R-Peak Detection for Noisy ECG Using Visibility Graphs. In: 2022 44th Annual International Conference of the IEEE Engineering @@ -332,12 +345,12 @@ def _ecg_clean_vgraph(ecg_signal, sampling_rate=1000): # Template Convolution (Exploratory) # ============================================================================= def _ecg_clean_templateconvolution(ecg_signal, sampling_rate=1000): - """Filter and Convolve ECG signal with QRS complex template. - Totally exploratory method by Dominique Makowski, use at your own risks. + """Filter and Convolve ECG signal with QRS complex template. Totally exploratory method by Dominique Makowski, use + at your own risks. + + The idea is to use a QRS template to convolve the signal with, in order to magnify the QRS features. However, + it doens't work well and creates a lot of artifacts. If you have ideas for improvement please let me know! - The idea is to use a QRS template to convolve the signal with, in order to magnify the QRS - features. However, it doens't work well and creates a lot of artifacts. If you have ideas for - improvement please let me know! """ window_size = int(np.round(sampling_rate / 4)) @@ -355,14 +368,10 @@ def _ecg_clean_templateconvolution(ecg_signal, sampling_rate=1000): ) # Detect peaks - peaks, _ = scipy.signal.find_peaks( - filtered, distance=sampling_rate / 3, height=0.5 * np.std(filtered) - ) + peaks, _ = scipy.signal.find_peaks(filtered, distance=sampling_rate / 3, height=0.5 * np.std(filtered)) peaks = peaks[peaks + 0.6 * sampling_rate < len(ecg_signal)] - idx = [ - np.arange(p - int(sampling_rate / 2), p + int(sampling_rate / 2)) for p in peaks - ] + idx = [np.arange(p - int(sampling_rate / 2), p + int(sampling_rate / 2)) for p in peaks] epochs = np.array([filtered[i] for i in idx]) qrs = np.mean(epochs, axis=0) diff --git a/neurokit2/ecg/ecg_findpeaks.py b/neurokit2/ecg/ecg_findpeaks.py index 4b36ca281c..6a1f34e61b 100644 --- a/neurokit2/ecg/ecg_findpeaks.py +++ b/neurokit2/ecg/ecg_findpeaks.py @@ -3,6 +3,9 @@ import pandas as pd import scipy.signal import scipy.stats +from warnings import warn +from bisect import insort +from collections import deque from ..signal import ( signal_findpeaks, @@ -11,6 +14,7 @@ signal_smooth, signal_zerocrossings, ) +from ..misc import NeuroKitWarning def ecg_findpeaks( @@ -103,8 +107,15 @@ def _ecg_findpeaks_findmethod(method): return _ecg_findpeaks_WT elif method in ["rodrigues2020", "rodrigues2021", "rodrigues", "asi"]: return _ecg_findpeaks_rodrigues - elif method in ["vg", "vgraph", "koka2022"]: - return _ecg_findpeaks_vgraph + elif method in ["vg", "vgraph", "fastnvg", "emrich", "emrich2023"]: + return _ecg_findpeaks_visibilitygraph + elif method in ["koka2022", "koka"]: + warn( + "The 'koka2022' method has been replaced by 'emrich2023'." + " Please replace method='koka2022' by method='emrich2023'.", + category=NeuroKitWarning, + ) + return _ecg_findpeaks_visibilitygraph elif method in ["promac", "all"]: return _ecg_findpeaks_promac else: @@ -335,7 +346,7 @@ def _ecg_findpeaks_hamilton(signal, sampling_rate=1000, **kwargs): - Hamilton, Open Source ECG Analysis Software Documentation, E.P.Limited, 2002. """ - diff = abs(np.diff(signal)) + diff = np.abs(np.diff(signal)) b = np.ones(int(0.08 * sampling_rate)) b = b / int(0.08 * sampling_rate) @@ -345,12 +356,12 @@ def _ecg_findpeaks_hamilton(signal, sampling_rate=1000, **kwargs): ma[0 : len(b) * 2] = 0 - n_pks = [] + n_pks = deque([], maxlen=8) n_pks_ave = 0.0 - s_pks = [] + s_pks = deque([], maxlen=8) s_pks_ave = 0.0 QRS = [0] - RR = [] + RR = deque([], maxlen=8) RR_ave = 0.0 th = 0.0 @@ -359,47 +370,40 @@ def _ecg_findpeaks_hamilton(signal, sampling_rate=1000, **kwargs): idx = [] peaks = [] - for i in range(len(ma)): # pylint: disable=C0200,R1702 - if ( - i > 0 and i < len(ma) - 1 and ma[i - 1] < ma[i] and ma[i + 1] < ma[i] - ): # pylint: disable=R1716 + for i in range(1, len(ma) - 1): # pylint: disable=C0200,R1702 + if ma[i - 1] < ma[i] and ma[i + 1] < ma[i]: # pylint: disable=R1716 peak = i peaks.append(peak) if ma[peak] > th and (peak - QRS[-1]) > 0.3 * sampling_rate: QRS.append(peak) idx.append(peak) s_pks.append(ma[peak]) - if len(n_pks) > 8: - s_pks.pop(0) s_pks_ave = np.mean(s_pks) if RR_ave != 0.0 and QRS[-1] - QRS[-2] > 1.5 * RR_ave: missed_peaks = peaks[idx[-2] + 1 : idx[-1]] + missed_peak_added = False for missed_peak in missed_peaks: if ( missed_peak - peaks[idx[-2]] > int(0.36 * sampling_rate) and ma[missed_peak] > 0.5 * th ): - QRS.append(missed_peak) - QRS.sort() + insort(QRS, missed_peak) + missed_peak_added = True break + if missed_peak_added: + continue if len(QRS) > 2: RR.append(QRS[-1] - QRS[-2]) - if len(RR) > 8: - RR.pop(0) RR_ave = int(np.mean(RR)) else: n_pks.append(ma[peak]) - if len(n_pks) > 8: - n_pks.pop(0) n_pks_ave = np.mean(n_pks) th = n_pks_ave + 0.45 * (s_pks_ave - n_pks_ave) - i += 1 - QRS.pop(0) QRS = np.array(QRS, dtype="int") @@ -771,24 +775,24 @@ def _ecg_findpeaks_elgendi(signal, sampling_rate=1000, **kwargs): window2 = int(0.6 * sampling_rate) mwa_beat = _ecg_findpeaks_MWA(abs(signal), window2) - blocks = np.zeros(len(signal)) - block_height = np.max(signal) + blocks = mwa_qrs > mwa_beat - for i in range(len(mwa_qrs)): # pylint: disable=C0200 - blocks[i] = block_height if mwa_qrs[i] > mwa_beat[i] else 0 QRS = [] - for i in range(1, len(blocks)): - if blocks[i - 1] == 0 and blocks[i] == block_height: + qrs_duration_threshold = int(0.08 * sampling_rate) + rr_distance_threshold = int(0.3 * sampling_rate) + + for i, (prev, cur) in enumerate(zip(blocks, blocks[1:])): + if prev < cur: start = i - elif blocks[i - 1] == block_height and blocks[i] == 0: + elif prev > cur: end = i - 1 - if end - start > int(0.08 * sampling_rate): + if end - start > qrs_duration_threshold: detection = np.argmax(signal[start : end + 1]) + start if QRS: - if detection - QRS[-1] > int(0.3 * sampling_rate): + if detection - QRS[-1] > rr_distance_threshold: QRS.append(detection) else: QRS.append(detection) @@ -1119,7 +1123,7 @@ def _ecg_findpeaks_rodrigues(signal, sampling_rate=1000, **kwargs): """ - N = int(np.round(3 * sampling_rate / 128)) + N = int(np.clip(np.round(3 * sampling_rate / 128), 2, None)) Nd = N - 1 Pth = (0.7 * sampling_rate) / 128 + 2.7 # Pth = 3, optimal for fs = 250 Hz @@ -1172,18 +1176,34 @@ def _ecg_findpeaks_rodrigues(signal, sampling_rate=1000, **kwargs): # ============================================================================= -# Visibility graph transformation - by Koka and Muma (2022) +# Fast Visibility Graph Detector - by Emrich et al. (2023) # ============================================================================= -def _ecg_findpeaks_vgraph(signal, sampling_rate=1000, lowcut=3, order=2, **kwargs): - """R-Peak Detector Using Visibility Graphs by Taulant Koka and Michael Muma (2022). +def _ecg_findpeaks_visibilitygraph( + signal, + sampling_rate=1000, + window_seconds=2, + window_overlap=0.5, + accelerated=True, + **kwargs, +): + """Accelerated R-Peak Detector Using Visibility Graphs by Emrich et al. (2023). Implements the FastNVG algorithm, + allowing fast and sample precise R-peak detection. References ---------- + - J. Emrich, T. Koka, S. Wirth and M. Muma, "Accelerated Sample-Accurate R-Peak + Detectors Based on Visibility Graphs," 31st European Signal Processing + Conference (EUSIPCO), 2023, pp. 1090-1094, doi: 10.23919/EUSIPCO58844.2023.10290007, + https://ieeexplore.ieee.org/document/10290007 + - T. Koka and M. Muma (2022), Fast and Sample Accurate R-Peak Detection for Noisy ECG Using Visibility Graphs. In: 2022 44th Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC). Uses the Pan and Tompkins thresholding. + - https://github.com/JonasEmrich/vg-beat-detectors + """ + # Try loading ts2vg try: import ts2vg @@ -1193,50 +1213,81 @@ def _ecg_findpeaks_vgraph(signal, sampling_rate=1000, lowcut=3, order=2, **kwarg " this method to run. Please install it first (`pip install ts2vg`)." ) from import_error - N = len(signal) - M = 2 * sampling_rate - w = np.zeros(N) - rpeaks = [] - beta = 0.55 - gamma = 0.5 - L = 0 - R = M - - # Compute number of segments - deltaM = int(gamma * M) - n_segments = int((N - deltaM) / (M - deltaM)) + 1 - - for segment in range(n_segments): - y = signal[L:R] + # Initialize variables + N = len(signal) # Signal length + M = int(window_seconds * sampling_rate) # Length of window segment + L = 0 # Left segment boundary + R = M # Right segment boundary + dM = int(np.ceil(window_overlap * M)) # Size of segment overlap + n_segments = int(np.ceil(((N - R) / (M - dM) + 1))) # Number of segments + weights = np.zeros(N) # Empty array to store the weights + BETA = 0.55 # Target number of nonzero elements in the resulting weight vector + + # If input length is smaller than window, compute only one segment of this length + if N < M: + M, R = N, N + n_segments = 1 + + # Do computation in small segments + for jj in range(n_segments): + segment = signal[L:R] + + # Select indicies and values to construct the visibility graph + if accelerated: + # Compute the threshold for the segment + threshold = np.quantile(segment, 0.5) + # Find local maxima + indices_maxima = scipy.signal.find_peaks(segment)[0] + segment_maxima = segment[indices_maxima] + # Select local maxima above the threshold + greater = np.argwhere(segment_maxima > threshold).flatten() + indices = indices_maxima[greater] + segment = segment_maxima[greater] + else: + indices = np.arange(len(segment)) # Compute the adjacency matrix to the directed visibility graph - A = ts2vg.NaturalVG(directed="top_to_bottom").build(y).adjacency_matrix() - _w = np.ones(len(y)) - - # Computee the weights for the ecg using its VG transformation - while np.count_nonzero(_w) / len(y) >= beta: - _w = np.matmul(A, _w) / np.linalg.norm(_w) + A = ( + ts2vg.NaturalVG(directed="top_to_bottom") + .build(segment, indices) + .adjacency_matrix() + ) - # Update the weight vector + # Compute the ECG weights using k-Hop-paths + size = len(indices) + w = np.ones(size) / size + while np.count_nonzero(w) / size >= BETA: + _w = np.matmul(A, w) + _w /= np.linalg.norm(_w) + if np.any(np.isnan(_w)): + break + w = _w + + # Update weight vector by merging overlaping segments if L == 0: - w[L:R] = _w - elif N - deltaM <= L < L < N: - w[L:] = 0.5 * (_w + w[L:]) + weights[L + indices] = w + elif N - dM + 1 <= L and L + 1 <= N: + weights[L + indices] = 0.5 * (weights[L + indices] + w) else: - w[L : L + deltaM] = 0.5 * (_w[:deltaM] + w[L : L + deltaM]) - w[L + deltaM : R] = _w[deltaM:] + weights[L + indices[indices <= dM]] = 0.5 * ( + w[indices <= dM] + weights[L + indices[indices <= dM]] + ) + weights[L + indices[indices > dM]] = w[indices > dM] + + if R - L < M: + break - # Update the boundary conditions - L = L + (M - deltaM) - if R + (M - deltaM) <= N: - R = R + (M - deltaM) + # Update segment boundaries + L += M - dM + if R + (M - dM) <= N: + R += M - dM else: R = N - # Multiply signal by its weights and apply thresholding algorithm - weighted_signal = signal * w - rpeaks = _ecg_findpeaks_peakdetect(weighted_signal, sampling_rate) - rpeaks = np.array(rpeaks, dtype="int") + # Weigh signal with obtained weights and use thresholding algorithm for peak localization + weighted_signal = signal * weights + rpeaks = _ecg_findpeaks_visgraphthreshold(weighted_signal, sampling_rate) + rpeaks = np.array(rpeaks, dtype="int") return rpeaks @@ -1336,3 +1387,100 @@ def _ecg_findpeaks_peakdetect(detection, sampling_rate=1000, **kwargs): NPKI = 0.125 * peak_value + 0.875 * NPKI return signal_peaks + + +def _ecg_findpeaks_visgraphthreshold(weight, sampling_frequency=1000, **kwargs): + """Based on the python implementation [3] of the thresholding proposed by Pan and Tompkins in [4]. Modifications + were made with respect to the application of R-peak detection using visibility graphs and the k-Hop paths metric. + + References + ---------- + [3] https://github.com/berndporr/py-ecg-detectors + [4] J. Pan and W. J. Tompkins, “A Real-Time QRS Detection Algorithm”, IEEE Transactions on Biomedical + Engineering, vol. BME-32, Mar. 1985. pp. 230-236. + + """ + # initialise variables + N = len(weight) + min_distance = int(0.25 * sampling_frequency) + signal_peaks = [-min_distance] + noise_peaks = [] + + # Learning Phase, 2sec + spki = ( + np.max(weight[0 : 2 * sampling_frequency]) * 0.25 + ) # running estimate of signal level + npki = ( + np.mean(weight[0 : 2 * sampling_frequency]) * 0.5 + ) # running estimate of noise level + threshold_I1 = spki + + # iterate over the whole array / series + for i in range(N): + # skip first and last elements + if 0 < i < N - 1: + # detect peak candidates based on a rising + falling slope + if weight[i - 1] <= weight[i] >= weight[i + 1]: + # peak candidates should be greater than signal threshold + if weight[i] > threshold_I1: + # distance to last peak is greater than minimum detection distance + if (i - signal_peaks[-1]) > 0.3 * sampling_frequency: + signal_peaks.append(i) + spki = 0.125 * weight[signal_peaks[-1]] + 0.875 * spki + # candidate is close to last detected peak -> check if current candidate is better choice + elif 0.3 * sampling_frequency >= (i - signal_peaks[-1]): + # compare slope of last peak with current candidate + if ( + weight[i] > weight[signal_peaks[-1]] + ): # test greater slope -> qrs + spki = ( + spki - 0.125 * weight[signal_peaks[-1]] + ) / 0.875 # reset threshold + signal_peaks[-1] = i + spki = 0.125 * weight[signal_peaks[-1]] + 0.875 * spki + else: + noise_peaks.append(i) + npki = 0.125 * weight[noise_peaks[-1]] + 0.875 * npki + else: + # not a peak -> label as noise and update noise level + npki = 0.125 * weight[i] + 0.875 * npki + + # back search for missed peaks + if len(signal_peaks) > 8: + RR = np.diff(signal_peaks[-9:]) + RR_ave = int(np.mean(RR)) + RR_missed = int(1.66 * RR_ave) + + # if time difference of the last two signal peaks found is too large + if signal_peaks[-1] - signal_peaks[-2] > RR_missed: + threshold_I2 = 0.5 * threshold_I1 + # get range of candidates and apply noise threshold + missed_section_peaks = range( + signal_peaks[-2] + min_distance, + signal_peaks[-1] - min_distance, + ) + missed_section_peaks = [ + p + for p in missed_section_peaks + if weight[p] > threshold_I2 + ] + + # add the largest sample in missed interval to peaks + if len(missed_section_peaks) > 0: + missed_peak = missed_section_peaks[ + np.argmax(weight[missed_section_peaks]) + ] + signal_peaks.append(signal_peaks[-1]) + signal_peaks[-2] = missed_peak + + else: + # not a peak -> label as noise and update noise level + npki = 0.125 * weight[i] + 0.875 * npki + + threshold_I1 = npki + 0.25 * (spki - npki) + + # remove first dummy elements + if signal_peaks[0] == -min_distance: + signal_peaks.pop(0) + + return np.array(signal_peaks) diff --git a/neurokit2/ecg/ecg_peaks.py b/neurokit2/ecg/ecg_peaks.py index ebaf59bf0d..e3d884bcdc 100644 --- a/neurokit2/ecg/ecg_peaks.py +++ b/neurokit2/ecg/ecg_peaks.py @@ -6,14 +6,7 @@ from .ecg_findpeaks import ecg_findpeaks -def ecg_peaks( - ecg_cleaned, - sampling_rate=1000, - method="neurokit", - correct_artifacts=False, - show=False, - **kwargs -): +def ecg_peaks(ecg_cleaned, sampling_rate=1000, method="neurokit", correct_artifacts=False, show=False, **kwargs): """**Find R-peaks in an ECG signal** Find R-peaks in an ECG signal using the specified method. You can pass an unfiltered ECG @@ -40,7 +33,9 @@ def ecg_peaks( * **nabian2018**: Algorithm by Nabian et al. (2018) based on the Pan-Tompkins algorithm. * **rodrigues2021**: Adaptation of the work by Sadhukhan & Mitra (2012) and Gutiérrez-Rivas et al. (2015) by Rodrigues et al. (2021). - * **koka2022**: Algorithm by Koka et al. (2022) based on the visibility graphs. + * **emrich2023**: FastNVG Algorithm by Emrich et al. (2023) based on the visibility graph detector of Koka et al. (2022). + Provides fast and sample-accurate R-peak detection. The algorithm transforms the ecg into a graph representation + and extracts exact R-peak positions using graph metrics. * **promac**: ProMAC combines the result of several R-peak detectors in a probabilistic way. For a given peak detector, the binary signal representing the peak locations is convolved with a Gaussian distribution, resulting in a probabilistic representation of each peak @@ -153,8 +148,9 @@ def ecg_peaks( # rodrigues2021 _, rodrigues2021 = nk.ecg_peaks(ecg, sampling_rate=250, method="rodrigues2021") - # koka2022 - _, koka2022 = nk.ecg_peaks(ecg, sampling_rate=250, method="koka2022") + # emrich2023 + cleaned = nk.ecg_clean(ecg, sampling_rate=250, method="emrich2023") + _, emrich2023 = nk.ecg_peaks(cleaned, sampling_rate=250, method="emrich2023") # Collect all R-peak lists by iterating through the result dicts rpeaks = [ @@ -171,7 +167,7 @@ def ecg_peaks( engzeemod2012, kalidas2017, rodrigues2021, - koka2022 + emrich2023 ] ] # Visualize results @@ -246,20 +242,16 @@ def ecg_peaks( * Lipponen, J. A., & Tarvainen, M. P. (2019). A robust algorithm for heart rate variability time series artefact correction using novel beat classification. Journal of medical engineering & technology, 43(3), 173-181. + * Emrich, J., Koka, T., Wirth, S., & Muma, M. (2023), Accelerated Sample-Accurate R-Peak + Detectors Based on Visibility Graphs. 31st European Signal Processing Conference + (EUSIPCO), 1090-1094, doi: 10.23919/EUSIPCO58844.2023.10290007 """ # Store info info = {"method_peaks": method.lower(), "method_fixpeaks": "None"} # First peak detection - info.update( - ecg_findpeaks( - ecg_cleaned, - sampling_rate=sampling_rate, - method=info["method_peaks"], - **kwargs - ) - ) + info.update(ecg_findpeaks(ecg_cleaned, sampling_rate=sampling_rate, method=info["method_peaks"], **kwargs)) # Peak correction if correct_artifacts: @@ -422,10 +414,7 @@ def _ecg_peaks_plot_artefacts( if len(raw) == 0: return "No bad peaks" if any([i < len(signal) for i in raw]): - return ( - "Peak indices longer than signal. Signals might have been cropped. " - + "Better skip plotting." - ) + return "Peak indices longer than signal. Signals might have been cropped. " + "Better skip plotting." extra = [i for i in raw if i not in peaks] if len(extra) > 0: diff --git a/neurokit2/eda/eda_analyze.py b/neurokit2/eda/eda_analyze.py index 3ef74eb508..38b87e8244 100644 --- a/neurokit2/eda/eda_analyze.py +++ b/neurokit2/eda/eda_analyze.py @@ -97,7 +97,7 @@ def eda_analyze(data, sampling_rate=1000, method="auto"): # Interval-related analysis elif method in ["interval-related", "interval", "resting-state"]: - features = eda_intervalrelated(data) + features = eda_intervalrelated(data, sampling_rate=sampling_rate) # Auto elif method in ["auto"]: @@ -106,7 +106,7 @@ def eda_analyze(data, sampling_rate=1000, method="auto"): for i in data: duration = len(data[i]) / sampling_rate if duration >= 10: - features = eda_intervalrelated(data) + features = eda_intervalrelated(data, sampling_rate=sampling_rate) else: features = eda_eventrelated(data) @@ -117,7 +117,7 @@ def eda_analyze(data, sampling_rate=1000, method="auto"): else: duration = len(data) / sampling_rate if duration >= 10: - features = eda_intervalrelated(data) + features = eda_intervalrelated(data, sampling_rate=sampling_rate) else: features = eda_eventrelated(data) diff --git a/neurokit2/eda/eda_phasic.py b/neurokit2/eda/eda_phasic.py index 3e6b9ff9d7..f19361a676 100644 --- a/neurokit2/eda/eda_phasic.py +++ b/neurokit2/eda/eda_phasic.py @@ -357,16 +357,16 @@ def _eda_phasic_sparsEDA( if np.sum(np.isnan(eda_signal)) > 0: raise AssertionError("Signal contains NaN") - # Preprocessing - signalAdd = np.zeros(len(eda_signal) + (20 * sampling_rate) + (60 * sampling_rate)) - signalAdd[0 : 20 * sampling_rate] = eda_signal[0] - signalAdd[20 * sampling_rate : 20 * sampling_rate + len(eda_signal)] = eda_signal - signalAdd[20 * sampling_rate + len(eda_signal) :] = eda_signal[-1] - # Resample to 8 Hz eda_signal = signal_resample(eda_signal, sampling_rate=sampling_rate, desired_sampling_rate=8) new_sr = 8 + # Preprocessing + signalAdd = np.zeros(len(eda_signal) + (20 * new_sr) + (60 * new_sr)) + signalAdd[0 : 20 * new_sr] = eda_signal[0] + signalAdd[20 * new_sr : 20 * new_sr + len(eda_signal)] = eda_signal + signalAdd[20 * new_sr + len(eda_signal) :] = eda_signal[-1] + Nss = len(eda_signal) Ns = len(signalAdd) b0 = 0 @@ -428,7 +428,7 @@ def _eda_phasic_sparsEDA( b0 = signalCut[0] signalCutIn = signalCut - b0 - beta, _, _, _, _, _ = lasso(R, signalCutIn, sampling_rate, Kmax, epsilon) + beta, _, _, _, _, _ = lasso(R, signalCutIn, new_sr, Kmax, epsilon) signalEst = (np.matmul(R, beta) + b0).reshape(-1) @@ -437,8 +437,8 @@ def _eda_phasic_sparsEDA( # res3 = sum(remAout(40*sampling_rate+1:(60*sampling_rate))); remAout = (signalCut - signalEst) ** 2 - res2 = np.sum(remAout[20 * sampling_rate : 40 * sampling_rate]) - res3 = np.sum(remAout[40 * sampling_rate : 60 * sampling_rate]) + res2 = np.sum(remAout[20 * new_sr : 40 * new_sr]) + res3 = np.sum(remAout[40 * new_sr : 60 * new_sr]) jump = 1 if res2 < 1: @@ -454,19 +454,19 @@ def _eda_phasic_sparsEDA( SCRaux[:] = SCRline.reshape([5, Lreg]).transpose() driver = SCRaux.sum(axis=1) - b0 = np.matmul(R[jump * 20 * sampling_rate - 1, 0:6], beta[0:6, :]) + b0 + b0 = np.matmul(R[jump * 20 * new_sr - 1, 0:6], beta[0:6, :]) + b0 - driverAux[cutS : cutS + (jump * 20 * sampling_rate)] = driver[0 : jump * sampling_rate * 20] - slcAux[cutS : cutS + (jump * 20 * sampling_rate)] = SCL[ - 0 : jump * sampling_rate * 20 + driverAux[cutS : cutS + (jump * 20 * new_sr)] = driver[0 : jump * new_sr * 20] + slcAux[cutS : cutS + (jump * 20 * new_sr)] = SCL[ + 0 : jump * new_sr * 20 ].reshape(-1) - resAux[cutS : cutS + (jump * 20 * sampling_rate)] = remAout[0 : jump * sampling_rate * 20] - cutS = cutS + jump * 20 * sampling_rate + resAux[cutS : cutS + (jump * 20 * new_sr)] = remAout[0 : jump * new_sr * 20] + cutS = cutS + jump * 20 * new_sr cutE = cutS + N SCRaux = driverAux[pointerS:pointerE] SCL = slcAux[pointerS:pointerE] - MSE = resAux[pointerS:pointerE] + #MSE = resAux[pointerS:pointerE] # PP ind = np.argwhere(SCRaux > 0).reshape(-1) @@ -488,11 +488,11 @@ def _eda_phasic_sparsEDA( threshold = rho * scr_max driver[driver < threshold] = 0 - # Resample + # Resample back to original sampling rate + SCR = eda_signal-SCL + SCR = signal_resample(SCR, desired_length=original_length) SCL = signal_resample(SCL, desired_length=original_length) - MSE = signal_resample(MSE, desired_length=original_length) - - return driver, SCL, MSE + return SCL, SCR def lasso(R, s, sampling_rate, maxIters, epsilon): diff --git a/neurokit2/emg/emg_activation.py b/neurokit2/emg/emg_activation.py index 9de24821f5..752d3cb5f5 100644 --- a/neurokit2/emg/emg_activation.py +++ b/neurokit2/emg/emg_activation.py @@ -231,16 +231,16 @@ def emg_activation( # Modify output produced by signal_formatpeaks. for x in range(len(emg_amplitude)): - if df_activity["EMG_Activity"][x] != 0: + if df_activity.loc[x, "EMG_Activity"] != 0: if df_activity.index[x] == df_activity.index.get_loc(x): - df_activity["EMG_Activity"][x] = 1 + df_activity.loc[x, "EMG_Activity"] = 1 else: - df_activity["EMG_Activity"][x] = 0 - if df_offsets["EMG_Offsets"][x] != 0: + df_activity.loc[x, "EMG_Activity"] = 0 + if df_offsets.loc[x, "EMG_Offsets"] != 0: if df_offsets.index[x] == df_offsets.index.get_loc(x): - df_offsets["EMG_Offsets"][x] = 1 + df_offsets.loc[x, "EMG_Offsets"] = 1 else: - df_offsets["EMG_Offsets"][x] = 0 + df_offsets.loc[x, "EMG_Offsets"] = 0 activity_signal = pd.concat([df_activity, df_onsets, df_offsets], axis=1) diff --git a/neurokit2/epochs/epochs_create.py b/neurokit2/epochs/epochs_create.py index bf34d8cded..29c40320c0 100644 --- a/neurokit2/epochs/epochs_create.py +++ b/neurokit2/epochs/epochs_create.py @@ -113,6 +113,15 @@ def epochs_create( # Chunk into n blocks of 1 second epochs = nk.epochs_create(data, sampling_rate=100, epochs_end=1) + * **Example 5**: Epochs with list of starting points + + .. ipython:: python + + epochs = nk.epochs_create(data, events, sampling_rate=100, + epochs_start=[0, -1, -1, 0], + epochs_end=[1, 0, 0, 1]) + [len(epochs[i]) for i in epochs.keys()] + """ # Santize data input @@ -129,9 +138,7 @@ def epochs_create( if isinstance(events, int): events = np.linspace(0, len(data), events + 2)[1:-1] if isinstance(events, dict) is False: - events = _events_find_label( - {"onset": events}, event_labels=event_labels, event_conditions=event_conditions - ) + events = _events_find_label({"onset": events}, event_labels=event_labels, event_conditions=event_conditions) event_onsets = list(events["onset"]) event_labels = list(events["label"]) @@ -160,9 +167,7 @@ def epochs_create( length_buffer = epoch_max_duration # First createa buffer of the same dtype as data and fill with it 0s - buffer = pd.DataFrame(0, index=range(length_buffer), columns=data.columns).astype( - dtype=data.dtypes - ) + buffer = pd.DataFrame(0, index=range(length_buffer), columns=data.columns).astype(dtype=data.dtypes) # Only then, we convert the non-integers to nans (because regular numpy's ints cannot be nan) buffer.select_dtypes(exclude="int64").replace({0.0: np.nan}, inplace=True) # Now we can combine the buffer with the data @@ -173,7 +178,6 @@ def epochs_create( epochs = {} for i, label in enumerate(parameters["label"]): - # Find indices start = parameters["onset"][i] + (parameters["start"][i] * sampling_rate) end = parameters["onset"][i] + (parameters["end"][i] * sampling_rate) @@ -201,9 +205,7 @@ def epochs_create( # Sanitize dtype of individual columns for i in epochs: - for colname, column in epochs[i].select_dtypes(include=["object"]).items(): - # Check whether columns are indices or label/condition values = column.unique().tolist() zero_or_one = not (False in [x in [0, 1] for x in values]) diff --git a/neurokit2/epochs/epochs_plot.py b/neurokit2/epochs/epochs_plot.py index 40a5c615bc..4876909c0e 100644 --- a/neurokit2/epochs/epochs_plot.py +++ b/neurokit2/epochs/epochs_plot.py @@ -4,7 +4,7 @@ from .epochs_to_df import epochs_to_df -def epochs_plot(epochs, legend=True, **kwargs): +def epochs_plot(epochs, legend=True, columns="all", **kwargs): """**Epochs visualization** Plot epochs. @@ -15,6 +15,8 @@ def epochs_plot(epochs, legend=True, **kwargs): A dict containing one DataFrame per event/trial. Usually obtained via `epochs_create()`. legend : bool Display the legend (the key of each epoch). + columns : str or list + Which columns to plot. If 'all', plot all columns. If a list, plot only the columns in the list. **kwargs Other arguments to pass (not used for now). @@ -34,10 +36,10 @@ def epochs_plot(epochs, legend=True, **kwargs): events = nk.events_find(data["Photosensor"], threshold_keep='below', event_conditions=["Negative", "Neutral", "Neutral", "Negative"]) - epochs = nk.epochs_create(data, events, sampling_rate=100, epochs_end=1) + epochs = nk.epochs_create(data, events, sampling_rate=100, epochs_end=7) @savefig p_epochs_plot1.png scale=100% - nk.epochs_plot(epochs) + nk.epochs_plot(epochs, columns=["EDA", "RSP"]) @suppress plt.close() @@ -79,6 +81,10 @@ def epochs_plot(epochs, legend=True, **kwargs): cols = data.columns.values cols = [x for x in cols if x not in ["Time", "Condition", "Label", "Index"]] + if columns != "all": + if isinstance(columns, str): + columns = [columns] + cols = [x for x in cols if x in columns] if len(cols) == 1: fig, ax = plt.subplots() @@ -98,14 +104,28 @@ def _epochs_mne_sanitize(epochs, what): """ data = epochs.to_data_frame() - data = data.rename(columns={"time": "Time", "condition": "Condition", "epoch": "Label"}) + data = data.rename( + columns={"time": "Time", "condition": "Condition", "epoch": "Label"} + ) data["Time"] = data["Time"] / 1000 # ms to seconds if isinstance(what, str): - data = data[[x for x in data.columns.values if x in ["Time", "Condition", "Label", what]]] + data = data[ + [ + x + for x in data.columns.values + if x in ["Time", "Condition", "Label", what] + ] + ] # Select a few specified channels elif isinstance(what, list): - data = data[[x for x in data.columns.values if x in ["Time", "Condition", "Label"] + what]] + data = data[ + [ + x + for x in data.columns.values + if x in ["Time", "Condition", "Label"] + what + ] + ] return data @@ -116,14 +136,25 @@ def _epochs_plot(data, ax, col, legend): grouped = data.groupby("Condition") # Colors - color_list = ["red", "blue", "green", "yellow", "purple", "orange", "cyan", "magenta"] + color_list = [ + "red", + "blue", + "green", + "yellow", + "purple", + "orange", + "cyan", + "magenta", + ] colors = {} for i, cond in enumerate(set(data["Condition"])): colors[cond] = color_list[i] # Plot for key, group in grouped: - df = group.pivot_table(index="Time", columns=["Condition", "Label"], values=col) + df = group.pivot_table( + index="Time", columns=["Condition", "Label"], values=col + ) df.plot(ax=ax, label=col, title=col, style=colors[key], legend=legend) # TODO: Custom legend diff --git a/neurokit2/events/events_find.py b/neurokit2/events/events_find.py index da94c98060..9e5c207a5f 100644 --- a/neurokit2/events/events_find.py +++ b/neurokit2/events/events_find.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import itertools + from warnings import warn import numpy as np +import pandas as pd from ..misc import NeuroKitWarning from ..signal import signal_binarize @@ -28,11 +30,13 @@ def events_find( Parameters ---------- - event_channel : array or list - The channel containing the events. + event_channel : array or list or DataFrame + The channel containing the events. If multiple channels are entered, the channels are + handled as reflecting different events new channel is created based on them. threshold : str or float The threshold value by which to select the events. If ``"auto"``, takes the value between - the max and the min. + the max and the min. If ``"auto"`` is used with multi-channel inputs, the default value + of 0.9 is used to capture all events. threshold_keep : str ``"above"`` or ``"below"``, define the events as above or under the threshold. For photosensors, a white screen corresponds usually to higher values. Therefore, if your @@ -60,14 +64,16 @@ def events_find( A list containing unique event identifiers. If ``None``, will use the event index number. event_conditions : list An optional list containing, for each event, for example the trial category, group or - experimental conditions. + experimental conditions. This option is ignored when multiple channels are supplied, as + the function generates these automatically. Returns ---------- dict - Dict containing 3 or 4 arrays, ``"onset"`` for event onsets, ``"duration"`` for event - durations, ``"label"`` for the event identifiers and the optional ``"conditions"`` passed - to ``event_conditions``. + Dict containing 3 to 5 arrays, ``"onset"`` for event onsets, ``"duration"`` for event + durations, ``"label"`` for the event identifiers, the optional ``"condition"`` passed + to ``event_conditions`` and the ``events_channel`` if multiple channels were supplied + to the function. See Also -------- @@ -99,14 +105,41 @@ def events_find( .. ipython:: python - events = nk.events_find(signal, duration_min= 10) + events = nk.events_find(signal, duration_min=10) @savefig p_events_find2.png scale=100% nk.events_plot(events, signal) @suppress plt.close() + Combine multiple digital signals into a single channel and its compute its events + The higher the channel, the higher the bit representation on the channel. + + .. ipython:: python + + signal2 = np.zeros(200) + signal2[65:80] = 1 + signal2[110:125] = 1 + signal2[175:190] = 1 + + @savefig p_events_find3.png scale=100% + nk.signal_plot([signal, signal2]) + @suppress + plt.close() + + events = nk.events_find([signal, signal2]) + events + @savefig p_events_find4.png scale=100% + nk.events_plot(events, events["events_channel"]) + @suppress + plt.close() + Convert the event condition results its human readable representation + + .. ipython:: python + value_to_condition = {1: "Stimulus 1", 2: "Stimulus 2", 3: "Stimulus 3"} + events["condition"] = [value_to_condition[id] for id in events["condition"]] + events """ events = _events_find( @@ -202,7 +235,7 @@ def _events_find_label( events["label"] = event_labels # Condition - if event_conditions is not None: + if event_conditions is not None and "condition" not in events: if len(event_conditions) != n: raise ValueError( "NeuroKit error: " @@ -218,7 +251,15 @@ def _events_find_label( def _events_find(event_channel, threshold="auto", threshold_keep="above"): - binary = signal_binarize(event_channel, threshold=threshold) + events_channel = _events_generate_events_channel(event_channel) + + # Differing setup based on multi-channel input or single channel input + if events_channel is not None: + if threshold == "auto": + threshold = 0.9 + binary = signal_binarize(events_channel, threshold=threshold) + else: + binary = signal_binarize(event_channel, threshold=threshold) if threshold_keep not in ["above", "below"]: raise ValueError( @@ -231,15 +272,57 @@ def _events_find(event_channel, threshold="auto", threshold_keep="above"): # Initialize data events = {"onset": [], "duration": []} + if events_channel is not None: + events["events_channel"] = events_channel + events["condition"] = [] + index = 0 for event, group in itertools.groupby(binary): duration = len(list(group)) if event == 1: events["onset"].append(index) events["duration"].append(duration) + + if events_channel is not None: + events["condition"].append(int(events["events_channel"][index])) index += duration # Convert to array events["onset"] = np.array(events["onset"]) events["duration"] = np.array(events["duration"]) return events + + +def _events_generate_events_channel(event_channels): + # check if nested list / array + is_nested_loop = isinstance(event_channels, (list, np.ndarray)) and ( + len(event_channels) > 1 and isinstance(event_channels[0], (list, np.ndarray)) + ) + + # check if dataframe + is_dataframe = ( + isinstance(event_channels, pd.DataFrame) and len(event_channels.columns) > 1 + ) + + # if neither, return None and continue + if not is_nested_loop and not is_dataframe: + return None + + stim_channel = None + + # create stim events array + if is_dataframe: + stim_channel = np.zeros(event_channels.shape[0]) + + # add channels based on order and multiply by power of 2 + for i, column in enumerate(event_channels): + peak_value = np.max(event_channels[column]) + stim_channel += np.floor(event_channels[column] / peak_value) * 2**i + + elif is_nested_loop: + stim_channel = np.zeros(len(event_channels[0])) + for i, channel in enumerate(event_channels): + peak_value = np.max(channel) + stim_channel += np.floor(channel / peak_value) * 2**i + + return stim_channel diff --git a/neurokit2/misc/__init__.py b/neurokit2/misc/__init__.py index 0a49dfcf9a..6abdcc2701 100644 --- a/neurokit2/misc/__init__.py +++ b/neurokit2/misc/__init__.py @@ -5,7 +5,7 @@ """ from ._warnings import NeuroKitWarning -from .random import check_random_state, check_random_state_children, spawn_random_state +from .check_random_state import check_random_state, check_random_state_children, spawn_random_state from .check_type import check_type from .copyfunction import copyfunction from .expspace import expspace @@ -21,6 +21,7 @@ from .replace import replace from .type_converters import as_vector from .report import create_report +from .fig2img import fig2img __all__ = [ @@ -43,4 +44,5 @@ "check_random_state_children", "spawn_random_state", "create_report", + "fig2img", ] diff --git a/neurokit2/misc/random.py b/neurokit2/misc/check_random_state.py similarity index 95% rename from neurokit2/misc/random.py rename to neurokit2/misc/check_random_state.py index 98fc9b85bb..50740619eb 100644 --- a/neurokit2/misc/random.py +++ b/neurokit2/misc/check_random_state.py @@ -25,6 +25,7 @@ def check_random_state(seed=None): rng: numpy.random.Generator or numpy.random.RandomState Random number generator. """ + # If seed is an integer, use the legacy RandomState class if isinstance(seed, numbers.Integral): return np.random.RandomState(seed) @@ -77,7 +78,10 @@ def spawn_random_state(rng, n_children=1): if rng._bit_generator._seed_seq is not None: rng_class = type(rng) bit_generator_class = type(rng._bit_generator) - return [rng_class(bit_generator_class(seed=s)) for s in rng._bit_generator._seed_seq.spawn(n_children)] + return [ + rng_class(bit_generator_class(seed=s)) + for s in rng._bit_generator._seed_seq.spawn(n_children) + ] except TypeError: # The rng does not support spawning through SeedSequence, see below pass @@ -94,7 +98,9 @@ def spawn_random_state(rng, n_children=1): return [np.random.RandomState(seed=s) for s in temp_rng.random_raw(n_children)] -def check_random_state_children(random_state_parent, random_state_children, n_children=1): +def check_random_state_children( + random_state_parent, random_state_children, n_children=1 +): """**Create new independent children random number generators to be used in sub-functions** Parameters diff --git a/neurokit2/misc/fig2img.py b/neurokit2/misc/fig2img.py new file mode 100644 index 0000000000..cda2ed45fd --- /dev/null +++ b/neurokit2/misc/fig2img.py @@ -0,0 +1,46 @@ +import io + + +def fig2img(fig): + """Matplotlib Figure to PIL Image + + Convert a Matplotlib figure to a PIL Image + + Parameters + ---------- + fig : plt.figure + Matplotlib figure. + + Returns + ---------- + list + The rescaled values. + + + Examples + ---------- + .. ipython:: python + + import neurokit2 as nk + import matplotlib.pyplot as plt + + plt.plot([1, 2, 3, 4, 5]) # Make plot + fig = plt.gcf() # Get current figure + nk.fig2img(fig) # Convert to PIL Image + plt.close(fig) # Close figure + + """ + + try: + import PIL.Image + except ImportError as e: + raise ImportError( + "fig2img(): the 'PIL' (Pillow) module is required for this function to run. ", + "Please install it first (`pip install pillow`).", + ) from e + + buffer = io.BytesIO() + fig.savefig(buffer) + buffer.seek(0) + img = PIL.Image.open(buffer) + return img diff --git a/neurokit2/rsp/rsp_clean.py b/neurokit2/rsp/rsp_clean.py index 0289fa99c9..34966e6a42 100644 --- a/neurokit2/rsp/rsp_clean.py +++ b/neurokit2/rsp/rsp_clean.py @@ -15,8 +15,8 @@ def rsp_clean(rsp_signal, sampling_rate=1000, method="khodadad2018", **kwargs): Clean a respiration signal using different sets of parameters, such as: - * **khodadad2018**: Linear detrending followed by a fifth order 2Hz low-pass IIR Butterworth - filter) + * **khodadad2018**: Second order 0.05-3 Hz bandpass Butterworth filter. Note that the + implementation differs from the referenced paper (see issue #950). * **BioSPPy**: Second order 0.1-0.35 Hz bandpass Butterworth filter followed by a constant detrending). * **hampel**: Applies a median-based Hampel filter by replacing values which are 3 (can be diff --git a/neurokit2/rsp/rsp_methods.py b/neurokit2/rsp/rsp_methods.py index 81f4a48972..decd3860d6 100644 --- a/neurokit2/rsp/rsp_methods.py +++ b/neurokit2/rsp/rsp_methods.py @@ -106,14 +106,7 @@ def rsp_methods( report_info["text_cleaning"] = f"The raw signal, sampled at {sampling_rate} Hz," if method_cleaning in ["khodadad", "khodadad2018"]: report_info["text_cleaning"] += ( - " linear detrending followed by a fifth order 2Hz low-pass IIR Butterworth filter; " - + "following Khoadadad et al., 2018." - ) - - refs.append( - """Khodadad, D., Nordebo, S., Müller, B., Waldmann, A., Yerworth, R., Becher, T.,... & Bayford, R. (2018). - Optimized breath detection algorithm in electrical impedance tomography. - Physiological measurement, 39(9), 094001.""" + " was preprocessed using a second order 0.05-3 Hz bandpass Butterworth filter." ) elif method_cleaning in ["hampel", "power", "power2020"]: report_info["text_cleaning"] += ( @@ -136,10 +129,10 @@ def rsp_methods( elif method_cleaning in ["none"]: report_info[ "text_cleaning" - ] += "was directly used for peak detection without preprocessing." + ] += " was directly used for peak detection without preprocessing." else: # just in case more methods are added - report_info["text_cleaning"] += f"was cleaned following the {method} method." + report_info["text_cleaning"] += f" was cleaned following the {method} method." # 2. Peaks # ---------- diff --git a/neurokit2/rsp/rsp_rate.py b/neurokit2/rsp/rsp_rate.py index 19f032e473..b84b7c7397 100644 --- a/neurokit2/rsp/rsp_rate.py +++ b/neurokit2/rsp/rsp_rate.py @@ -125,7 +125,7 @@ def _rsp_rate_xcorr( # Find xcorr for all frequencies with diff xcorr = [] t = np.linspace(0, window, len(diff)) - for frequency in np.arange(5 / 60, 30.25 / 60, 0.25 / 50): + for frequency in np.arange(5 / 60, 30.25 / 60, 0.25 / 60): # Define the sin waves sin_wave = np.sin(2 * np.pi * frequency * t) # Calculate cross-correlation diff --git a/neurokit2/signal/signal_resample.py b/neurokit2/signal/signal_resample.py index 92b7e229cb..470fefcb4e 100644 --- a/neurokit2/signal/signal_resample.py +++ b/neurokit2/signal/signal_resample.py @@ -157,11 +157,11 @@ def _resample_poly(signal, desired_length): def _resample_pandas(signal, desired_length): # Convert to Time Series - index = pd.date_range("20131212", freq="L", periods=len(signal)) + index = pd.date_range("20131212", freq="ms", periods=len(signal)) resampled_signal = pd.Series(signal, index=index) # Create resampling factor - resampling_factor = str(np.round(1 / (desired_length / len(signal)), 6)) + "L" + resampling_factor = str(np.round(1 / (desired_length / len(signal)), 6)) + "ms" # Resample resampled_signal = resampled_signal.resample(resampling_factor).bfill().values diff --git a/pyproject.toml b/pyproject.toml index 3a28c0f379..c41e4ba277 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [tool.poetry] name = "neurokit2" -# version = "0.1.0" # Needs to find a way to automatically get latest version +version = "0.2.8" # Needs to find a way to automatically get latest version description = "The Python Toolbox for Neurophysiological Signal Processing." authors = [ "Dominique Makowski <@DominiqueMakowski>", @@ -24,7 +24,14 @@ keywords = ['python', 'neurokit', 'neurokit2', 'physiological', 'signal'] [tool.poetry.dependencies] -python = "*" +python = "^3.11" +scipy = "^1.11" +scikit-learn = "^1.4" +pandas = "^2.1" +matplotlib = "*" +requests = "^2.31.0" +cvxopt = "^1.3" +biosppy = "*" [tool.poetry.dev-dependencies] pytest = "^3.4" \ No newline at end of file diff --git a/setup.py b/setup.py index c943f1d5fc..d65df7cb84 100644 --- a/setup.py +++ b/setup.py @@ -13,9 +13,7 @@ with open("NEWS.rst") as history_file: history = history_file.read() -history = history.replace("\n-------------------", "\n^^^^^^^^^^^^^^^^^^^").replace( - "\n=====", "\n-----" -) +history = history.replace("\n-------------------", "\n^^^^^^^^^^^^^^^^^^^").replace("\n=====", "\n-----") def find_version(): @@ -27,7 +25,7 @@ def find_version(): # Dependencies -requirements = ["numpy", "pandas", "scipy", "scikit-learn>=1.0.0", "matplotlib"] +requirements = ["requests", "numpy", "pandas", "scipy", "scikit-learn>=1.0.0", "matplotlib"] # Optional Dependencies (only needed / downloaded for testing purposes, for instance to test against some other packages) setup_requirements = ["pytest-runner", "numpy"] @@ -35,7 +33,7 @@ def find_version(): "pytest", "coverage", "bioread", - "mne[data]", + "mne", "pyentrp", "antropy", "EntropyHub", @@ -62,7 +60,7 @@ def find_version(): license="MIT license", # The name and contact of a maintainer author="Dominique Makowski", - author_email="dom.makowski@gmail.com", + author_email="D.Makowski@sussex.ac.uk", # Dependencies install_requires=requirements, setup_requires=setup_requirements, diff --git a/tests/ecg_data/expected_ecg_findpeaks_hamilton_bad_500.csv b/tests/ecg_data/expected_ecg_findpeaks_hamilton_bad_500.csv new file mode 100644 index 0000000000..67d22ccf3c --- /dev/null +++ b/tests/ecg_data/expected_ecg_findpeaks_hamilton_bad_500.csv @@ -0,0 +1,58 @@ +156 +354 +597 +848 +1016 +1535 +2134 +2740 +3296 +3884 +4419 +4925 +5499 +6072 +6570 +7108 +7621 +8117 +8556 +8956 +9573 +10084 +10593 +11193 +11734 +12261 +12880 +13454 +13975 +14567 +15152 +15659 +16240 +16784 +17262 +17781 +18360 +18850 +19453 +20090 +20542 +21038 +21661 +22148 +22581 +23129 +23611 +24030 +24655 +25278 +25796 +26393 +26828 +27340 +27958 +28530 +29144 +29665 diff --git a/tests/ecg_data/expected_ecg_findpeaks_hamilton_good_4000.csv b/tests/ecg_data/expected_ecg_findpeaks_hamilton_good_4000.csv new file mode 100644 index 0000000000..f90f37405b --- /dev/null +++ b/tests/ecg_data/expected_ecg_findpeaks_hamilton_good_4000.csv @@ -0,0 +1,26 @@ +1522 +3074 +4275 +5627 +6910 +8311 +11180 +14214 +17421 +20736 +24033 +27176 +30309 +31627 +33252 +34544 +36168 +37369 +38862 +40093 +41561 +42886 +44564 +45879 +47834 +49747 diff --git a/tests/tests_ecg_findpeaks.py b/tests/tests_ecg_findpeaks.py index f56174e692..c2ca3f4a1c 100644 --- a/tests/tests_ecg_findpeaks.py +++ b/tests/tests_ecg_findpeaks.py @@ -9,18 +9,25 @@ # Using neurokit2.ecg.ecg_findpeaks._ecg_findpeaks_MWA doesn't # work because of the "from .ecg_findpeaks import ecg_findpeaks" # statement in neurokit2/ecg/__init.__.py. -from neurokit2.ecg.ecg_findpeaks import _ecg_findpeaks_MWA, _ecg_findpeaks_peakdetect +from neurokit2.ecg.ecg_findpeaks import ( + _ecg_findpeaks_MWA, + _ecg_findpeaks_peakdetect, + _ecg_findpeaks_hamilton, +) def _read_csv_column(csv_name, column): - csv_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "ecg_data", csv_name) + csv_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "ecg_data", csv_name + ) csv_data = pd.read_csv(csv_path, header=None) return csv_data[column].to_numpy() def test_ecg_findpeaks_MWA(): np.testing.assert_array_equal( - _ecg_findpeaks_MWA(np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=float), 3), [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8] + _ecg_findpeaks_MWA(np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=float), 3), + [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8], ) @@ -37,9 +44,36 @@ def test_ecg_findpeaks_MWA(): # new bugs into the function. def test_ecg_findpeaks_peakdetect(): good_4000 = _read_csv_column("good_4000.csv", 1) - expected_good_4000_peaks = _read_csv_column("expected_ecg_findpeaks_peakdetect_good_4000.csv", 0) - np.testing.assert_array_equal(_ecg_findpeaks_peakdetect(good_4000, sampling_rate=4000), expected_good_4000_peaks) + expected_good_4000_peaks = _read_csv_column( + "expected_ecg_findpeaks_peakdetect_good_4000.csv", 0 + ) + np.testing.assert_array_equal( + _ecg_findpeaks_peakdetect(good_4000, sampling_rate=4000), + expected_good_4000_peaks, + ) + + bad_500 = _read_csv_column("bad_500.csv", 1) + expected_bad_500_peaks = _read_csv_column( + "expected_ecg_findpeaks_peakdetect_bad_500.csv", 0 + ) + np.testing.assert_array_equal( + _ecg_findpeaks_peakdetect(bad_500, sampling_rate=500), expected_bad_500_peaks + ) + + +def test_ecg_findpeaks_hamilton(): + good_4000 = _read_csv_column("good_4000.csv", 1) + expected_good_4000_peaks = _read_csv_column( + "expected_ecg_findpeaks_hamilton_good_4000.csv", 0 + ) + np.testing.assert_array_equal( + _ecg_findpeaks_hamilton(good_4000, sampling_rate=4000), expected_good_4000_peaks + ) bad_500 = _read_csv_column("bad_500.csv", 1) - expected_bad_500_peaks = _read_csv_column("expected_ecg_findpeaks_peakdetect_bad_500.csv", 0) - np.testing.assert_array_equal(_ecg_findpeaks_peakdetect(bad_500, sampling_rate=500), expected_bad_500_peaks) + expected_bad_500_peaks = _read_csv_column( + "expected_ecg_findpeaks_hamilton_bad_500.csv", 0 + ) + np.testing.assert_array_equal( + _ecg_findpeaks_hamilton(bad_500, sampling_rate=500), expected_bad_500_peaks + ) diff --git a/tests/tests_eda.py b/tests/tests_eda.py index 028dc43cf9..fe07efd746 100644 --- a/tests/tests_eda.py +++ b/tests/tests_eda.py @@ -81,9 +81,8 @@ def test_eda_phasic(): highpass = nk.eda_phasic(eda, sampling_rate=sr, method="highpass") assert len(highpass) == len(eda) - # This fails unfortunately... need to fix the sparsEDA algorithm - # sparsEDA = nk.eda_phasic(eda, sampling_rate=sr, method="sparsEDA") - # assert len(highpass) == len(eda) + sparsEDA = nk.eda_phasic(eda, sampling_rate=sr, method="sparsEDA") + assert len(sparsEDA) == len(eda) def test_eda_peaks(): diff --git a/tests/tests_events.py b/tests/tests_events.py index f2499c93d9..5bc8059906 100644 --- a/tests/tests_events.py +++ b/tests/tests_events.py @@ -1,5 +1,6 @@ import matplotlib.pyplot as plt import numpy as np +import pandas as pd import pytest import neurokit2 as nk @@ -65,3 +66,44 @@ def test_events_plot(): assert len(labels) == len(handles) plt.close(fig) + + +def test_stim_events_find(): + + channel1 = np.zeros(200) + channel1[10:30] = 1 + channel1[60:80] = 1 + channel1[150:170] = 1 + + channel2 = np.zeros(200) + channel2[60:80] = 1 + channel2[150:170] = 1 + + channel3 = np.zeros(200) + channel3[150:170] = 1 + + # test list of channels input + stim_events = nk.events_find([channel1, channel2, channel3]) + assert list(stim_events["onset"]) == [10, 60, 150] + assert list(stim_events["duration"]) == [20, 20, 20] + assert stim_events["condition"] == [1, 3, 7] + + # test array of array channels input + stim_events = nk.events_find( + np.array([channel1, channel2, channel3]) + ) + + assert list(stim_events["onset"]) == [10, 60, 150] + assert list(stim_events["duration"]) == [20, 20, 20] + assert stim_events["condition"] == [1, 3, 7] + + # test for pandas dataframe + stim_events = nk.events_find( + pd.DataFrame({"c1": channel1, "c2": channel2, "c3": channel3}) + ) + + assert list(stim_events["onset"]) == [10, 60, 150] + assert list(stim_events["duration"]) == [20, 20, 20] + assert stim_events["condition"] == [1, 3, 7] + +