-
Notifications
You must be signed in to change notification settings - Fork 1
/
01_artefact_detection.py
293 lines (244 loc) · 10.2 KB
/
01_artefact_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
"""
=============================================================
Extract segments of the data recorded during task performance
=============================================================
Segments that were recorded during the self-paced breaks (in between
experimental blocks) will be dropped.
Authors: José C. García Alanis <[email protected]>
License: BSD (3-clause)
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mne import Annotations, open_report
from mne.io import read_raw_fif
# All parameters are defined in config.py
from config import fname, parser, n_jobs, LoggingFormat
from bads import find_bad_channels
from viz import plot_z_scores
# Handle command line arguments
args = parser.parse_args()
subject = args.subject
print(LoggingFormat.PURPLE +
LoggingFormat.BOLD +
'Initialise bad channel detection for subject %s' % subject +
LoggingFormat.END)
###############################################################################
# 1) Import the output from previous processing step
input_file = fname.output(subject=subject,
processing_step='raw_files',
file_type='raw.fif')
raw = read_raw_fif(input_file, preload=True)
# drop status channel
raw.drop_channels('Status')
###############################################################################
# 2) Remove slow drifts and line noise
# Setting up band-pass filter from 0.1 - 40 Hz
#
# FIR filter parameters
# ---------------------
# Designing a one-pass, zero-phase, non-causal bandpass filter:
# - Windowed time-domain design (firwin) method
# - Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
# - Lower passband edge: 0.10
# - Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
# - Upper passband edge: 40.00 Hz
# - Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
# - Filter length: 8449 samples (33.004 sec)
raw = raw.filter(l_freq=0.1, h_freq=40.,
picks=['eeg', 'eog'],
filter_length='auto',
l_trans_bandwidth='auto',
h_trans_bandwidth='auto',
method='fir',
phase='zero',
fir_window='hamming',
fir_design='firwin',
n_jobs=n_jobs)
# plot filtered data
filt_plot = raw.plot(scalings=dict(eeg=50e-6, eog=50e-6),
n_channels=len(raw.info['ch_names']),
show=False)
plt.close('all')
###############################################################################
# 3) Check if there are any flat EOG channels
flat_eogs = find_bad_channels(raw, picks='eog', method='flat')['flat']
# remove flat eog channels from data
raw.drop_channels(flat_eogs)
###############################################################################
# 4) Plot power spectral density
fig, ax = plt.subplots(figsize=(10, 5))
raw.plot_psd(fmax=70, show=False, ax=ax)
plt.close('all')
###############################################################################
# 5) Find noisy channels and compute robust average reference
sfreq = raw.info['sfreq']
channels = raw.copy().pick_types(eeg=True).ch_names
# extract eeg signal
eeg_signal = raw.get_data(picks='eeg')
# reference signal to robust estimate of central tendency
ref_signal = np.nanmedian(eeg_signal, axis=0)
i = 0
noisy = []
while True:
# remove reference
eeg_temp = eeg_signal - ref_signal
# find bad channels by deviation (high variability in amplitude)
bad_dev = find_bad_channels(eeg_temp,
channels=channels,
method='deviation')['deviation']
# find channels that don't well with other channels
bad_corr = find_bad_channels(eeg_temp,
channels=channels,
sfreq=sfreq,
r_threshold=0.45,
percent_threshold=0.05,
time_step=1.0,
method='correlation')['correlation']
# only keep unique values
bads = set(bad_dev) | set(bad_corr)
# save identified noisy channels
if bads:
noisy.extend(bads)
print('Found bad channels %s'
% (', '.join([str(chan) for chan in bads])))
# interpolate noisy channels
raw_copy = raw.copy()
raw_copy.info['bads'] = noisy
raw_copy.interpolate_bads(mode='accurate')
eeg_signal = raw_copy.get_data(picks='eeg')
# compute new reference (mean of signal with interpolated channels)
ref_signal = np.nanmean(eeg_signal, axis=0)
# break if no (more) bad channels found
if (i > 0 and len(bads) == 0) or i > 4:
print('Finishing after i == %s' % i)
break
i = i + 1
###############################################################################
# 6) Compute robust average reference for EEG data
# remove robust reference
eeg_signal = raw.get_data(picks='eeg')
eeg_temp = eeg_signal - ref_signal
# bad by (un)correlation
bad_corr = find_bad_channels(eeg_temp,
channels=channels,
sfreq=sfreq,
r_threshold=0.45,
percent_threshold=0.05,
time_step=1.0,
method='correlation')['correlation']
# bad by deviation
bad_dev = find_bad_channels(eeg_temp,
channels=channels,
method='deviation',
return_z_scores=True)
z_scores = bad_dev['deviation_z_scores']
bad_dev = bad_dev['deviation']
# only keep unique values
bad_channels = set(bad_dev) | set(bad_corr)
# create plot showing channels z-scores
fig = plot_z_scores(z_scores, channels=channels, bads=bad_channels, show=False)
plt.close('all')
# interpolate channels identified by deviation criterion
raw.info['bads'] = list(bad_channels)
raw.interpolate_bads(mode='accurate')
###############################################################################
# 7) Reference eeg data to average of all eeg channels
raw.set_eeg_reference(ref_channels='average', projection=True)
###############################################################################
# 4) Find distorted segments in data
# channels to use in artefact detection procedure
eeg_channels = raw.copy().pick_types(eeg=True).ch_names
# ignore fronto-polar channels
picks = [raw.ch_names.index(channel)
for channel in eeg_channels if channel not in
{'Fp1', 'Fpz', 'Fp2', 'AF7', 'AF3', 'AFz', 'AF4', 'AF8'}]
# use a copy of eeg data
raw_copy = raw.copy()
raw_copy.apply_proj()
data = raw_copy.get_data(eeg_channels)
# detect artifacts (i.e., absolute amplitude > 500 microV)
times = []
annotations_df = pd.DataFrame(times)
onsets = []
duration = []
annotated_channels = []
bad_chans = []
# loop through samples
for sample in range(0, data.shape[1]):
if len(times) > 0:
if sample <= (times[-1] + int(1 * sfreq)):
continue
peak = []
for channel in picks:
peak.append(abs(data[channel][sample]))
if max(peak) >= 250e-6:
times.append(float(sample))
annotated_channels.append(raw_copy.ch_names[picks[int(np.argmax(
peak))]])
# if artifact found create annotations for raw data
if len(times) > 0:
# get first time
first_time = raw_copy.first_time
# column names
annot_infos = ['onset', 'duration', 'description']
# save onsets
onsets = np.asarray(times)
# include one second before artifact onset
onsets = ((onsets / sfreq) + first_time) - 1
# durations and labels
duration = np.repeat(2, len(onsets))
description = np.repeat('Bad', len(onsets))
# get annotations in data
artifacts = np.array((onsets, duration, description)).T
# to pandas data frame
artifacts = pd.DataFrame(artifacts,
columns=annot_infos)
# annotations from data
annotations = pd.DataFrame(raw_copy.annotations)
annotations = annotations[annot_infos]
# merge artifacts and previous annotations
artifacts = artifacts.append(annotations, ignore_index=True)
# create new annotation info
annotations = Annotations(artifacts['onset'],
artifacts['duration'],
artifacts['description'],
orig_time=raw_copy.annotations.orig_time)
# apply to raw data
raw.set_annotations(annotations)
# save total annotated time
total_time = sum(duration)
# save frequency of annotation per channel
frequency_of_annotation = {x: annotated_channels.count(x) * 2
for x in annotated_channels}
# create plot with clean data
plot_artefacts = raw.plot(scalings=dict(eeg=50e-6, eog=50e-6),
n_channels=len(raw.info['ch_names']),
title='Robust reference applied Sub-%s' % subject,
show=False)
plt.close('all')
###############################################################################
# 8) Export data to .fif for further processing
# output path
output_path = fname.output(processing_step='repair_bads',
subject=subject,
file_type='raw.fif')
# save file
raw.save(output_path, overwrite=True)
###############################################################################
# 6) Create HTML report
bad_channels_identified = '<p>Channels_interpolated:<br>'\
'%s <p>' \
% (', '.join([str(chan) for chan in bad_channels]))
with open_report(fname.report(subject=subject)[0]) as report:
report.add_htmls_to_section(htmls=bad_channels_identified,
captions='Bad channels',
section='Bad channel detection')
report.add_figs_to_section(fig, 'Robust Z-Scores',
section='Bad channel detection',
replace=True)
report.add_figs_to_section(plot_artefacts, 'Clean data',
section='Bad channel detection',
replace=True)
report.save(fname.report(subject=subject)[1], overwrite=True,
open_browser=False)