-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_db_peaks.py
175 lines (139 loc) · 5.2 KB
/
prepare_db_peaks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#%%
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from scipy.interpolate import interp1d
import numpy as np
# %%
def extract_from_df(dataframe, list_of_feature):
df = pd.DataFrame()
for i, feature in enumerate(list_of_feature):
df.insert(i , feature, dataframe[feature])
return df
def save_pickle(filename, dataframe):
with open(filename, 'wb') as file:
pickle.dump(dataframe, file)
def m_avg(t, x, w): return (np.asarray([t[i] for i in range(w, len(x) - w)]),
np.convolve(x, np.ones((2*w + 1, )) / (2*w + 1),
mode='valid'))
def detect_peaks(signal, mov_avg):
window = []
peaklist = []
for (i, datapoint), roll in zip(enumerate(signal), mov_avg):
if (datapoint <= roll) and (len(window) <= 1):
continue
elif (datapoint > roll):
window.append(datapoint)
else:
beatposition = i - len(window) + np.argmax(window)
peaklist.append(beatposition)
window = []
return peaklist, [signal[x] for x in peaklist]
# def db_to_dataframe(filename):
# """
# Load the json file obtained through create_db into a pandas dataframe
# Parameters
# ----
# filename: string; the full path to the json file, complete with extension
# Return
# ----
# d: DataFrame; the dataframe containing the elements stored in the file
# """
# d = json.load(open(filename))
# d = pd.DataFrame(d).T
# d = d.set_index(np.arange(len(d)))
# return d
def find_x_of_minima(time, signal):
"""
find index position of local minima whose amplitude is under a certain
moving threshold
Parameters
----
time: numerical 1-D array-like; basically the x axis of the curve whose
minima will be found
signal: numerical 1-D array-like; basically the y axis of the curve whose
minima will be found
Return
----
final_peaks: list; the list containing the index positions of signal minima
"""
# -1* is used to find minima instead of maxima
sign = -1*np.asarray(signal)
# using time to extrapolate sampling rate
srate = len(time)/(max(time)-min(time))
peaks = np.arange(len(sign)) # initializing peaks index
# different widths used for moving window, the unit is referred to 1 srate
for i in np.array([.5, 1., 1.5, 2., 3.]):
mt, mov_avg = m_avg(time, sign, int(srate*i))
# use len_filler to make mov_avg the same size as sign
len_filler = np.zeros((len(sign)-len(mov_avg))//2) + np.mean(sign)
mov_avg = np.insert(mov_avg, 0, len_filler)
mov_avg = np.append(mov_avg, len_filler)
peaklist, sign_peak = detect_peaks(sign, mov_avg)
# keeping only peaks detected with all 5 different windows
peaks = np.intersect1d(peaks, peaklist)
# first element can't be a correct local extrema, it has no points before
if(peaks[0] == 0):
peaks = np.delete(peaks, 0)
# last element can't be a correct local extrema, it has no points after
if(peaks[-1] == len(sign)-1):
peaks = np.delete(peaks, -1)
# peak checking: rejecting lower peaks where RR distance is too small
final_peaks = [] # definitive peak positions container
last_peak = -1 # parameter to avoid undesired peaks still in final_peaks
for p in peaks:
if p <= last_peak:
continue
evaluated_peaks = [g for g in peaks if p <= g <= srate*.5+p]
last_peak = max(evaluated_peaks)
final_peaks.append(evaluated_peaks[np.argmin(
[sign[x] for x in evaluated_peaks])])
final_peaks = np.unique(final_peaks) # to avoid repetitions
return final_peaks
# %%
filepath_input = 'db_after_notch.json'
list_of_feature = ('signal', 'time', 'age', 'quality', 'bpm')
filepath_output = '/Users/lorenzo/Desktop/PPG/db_ppg_5_peaks.pickle'
# %%
raw_df = pd.read_json(filepath_input, orient='index')
df = extract_from_df(raw_df, list_of_feature)
# %%
signal_patient = []
quality_threshold = 0.005
# idx_not_valid_signal = 3069
#%%
df = df[np.logical_and(df['quality']>=0, df['quality']<quality_threshold)]
df = df[df['age'] != 0]
df = df.reset_index(drop=True)
#df = df.drop(columns=['quality'])
N_PEOPLE = len(df['signal'])
#%%
final_peaks = []
final_labels = []
separate_guys = []
cont = 0
for guy in range(N_PEOPLE): # test_idx # range(len(d)):
#print(cont/len(d)) # same
cont += 1
separate_guys.append(len(final_peaks))
sample = df.iloc[guy] # d.loc[guy] # d.iloc[guy] #con loc non funziona
splits = find_x_of_minima(sample.time, sample.signal)
peaks = np.split(sample.signal, splits[5::5])[1:-1]
times = np.split(sample.time, splits[5::5])[1:-1]
labels = sample.drop(['signal', 'time'])
try:
new_peaks = list(map(lambda tx, sy: interp1d(tx, sy, kind="cubic")(
np.linspace(min(tx), max(tx), 512)), times, peaks))
final_peaks = final_peaks + new_peaks
final_labels = final_labels + [labels]*len(new_peaks)
except ValueError:
pass
print("ended")
#%%
new_df=pd.DataFrame(final_labels)
new_df['signal'] = final_peaks
new_df = new_df.reset_index(drop=True)
print(new_df)
# %%
save_pickle(filepath_output, new_df)
# %%