-
Notifications
You must be signed in to change notification settings - Fork 1
/
lynne_pp.py
318 lines (263 loc) · 10.8 KB
/
lynne_pp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
import os
import sys
dir_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
sys.path.append(f'{dir_path}/sabatinilab-glm/backend')
sys.path.append(f'{dir_path}/..')
sys.path.append(f'{dir_path}/backend')
sys.path.append(f'{dir_path}/../backend')
import time
import numpy as np
import sglm
import sglm_cv
import sglm_pp
import sglm_ez
import sglm_plt as splt
import sglm_save as ssave
import pandas as pd
def define_trial_starts_ends(df, trial_shift_bounds=7):
'''
Define trial starts and ends.
Args:
df: dataframe on which to define trial starts and ends
trial_shift_bounds: define how many timesteps before / after first / last event to include as non-ITI
Returns:
dataframe with added nTrial and nEndTrial columns to identify the number of the trial counts for start & end
'''
df['event_col_a'] = df['cpn'].replace(0, np.nan) * 1.0
df['event_col_b'] = df['lpx'].replace(0, np.nan) * 2.0
df['event_col_c'] = df['rpx'].replace(0, np.nan) * 2.0
df['event_col'] = df['event_col_a'].combine_first(df['event_col_b']).combine_first(df['event_col_c'])
df['event_col'] = df['event_col'].bfill()
df['trial_start_flag'] = ((df['event_col'] == 1.0)&(df['event_col'].shift(-1) != 1.0)).shift(-trial_shift_bounds) * 1.0
df['nTrial'] = df['trial_start_flag'].cumsum()
df['event_col_end'] = df['event_col_b'].combine_first(df['event_col_c']).combine_first(df['trial_start_flag'].replace(0.0, np.nan))
df['event_col_end'] = df['event_col_end'].ffill()
df['trial_end_flag'] = ((df['event_col_end'] == 2.0)&(df['event_col_end'].shift(1) != 2.0)&(df['nTrial'] > 0)).shift(trial_shift_bounds) * 1.0
df['nEndTrial'] = df['trial_end_flag'].cumsum()
return df.drop(['event_col_a', 'event_col_b', 'event_col_c'], axis=1)
def rename_columns(df):
'''
Simplify variable names to match the GLM
Args:
df: dataframe with entry, exit, lick, reward, and dFF columns
Returns:
dataframe with renamed columns
'''
# # Simplify variable names
# df = df.rename({'center port occupancy': 'cpo',
# 'center port entry': 'cpn',
# 'center port exit': 'cpx',
# 'left port occupancy': 'lpo',
# 'left port entry': 'lpn',
# 'left port exit': 'lpx',
# 'left licks': 'll',
# 'right port occupancy': 'rpo',
# 'right port entry': 'rpn',
# 'right port exit': 'rpx',
# 'right licks': 'rl',
# 'no reward': 'nr',
# 'reward': 'r',
# 'dF/F green (Ach3.0)': 'gdFF',
# 'zscored green (Ach3.0)': 'zsgdFF',
# 'dF/F green (dLight1.1)': 'gdFF',
# 'zscored green (dLight1.1)': 'zsgdFF',
# 'dF/F green (dlight1.1)': 'gdFF',
# 'zscored green (dlight1.1)': 'zsgdFF',
# 'dF/F (dlight1.1)': 'gdFF',
# 'zscore dF/F (dlight)': 'zsgdFF',
# 'zscore dF/F (Ach)': 'zsgdFF',
# 'zscore dF/F (Ach3.0)': 'zsgdFF',
# 'zscore dF/F (rGRAB-DA)' : 'zsrdFF',
# }, axis=1)
# Simplify variable names
df = df.rename({'Ch1':'Ch1',
'Ch2':'Ch2',
'Ch5':'Ch5',
'Ch6':'Ch6',
'centerOcc':'cpo',
'centerIn':'cpn',
'centerOut':'cpx',
'rightOcc':'rpo',
'rightIn':'rpn',
'rightOut':'rpx',
'rightLick':'rl',
'leftOcc':'lpo',
'leftIn':'lpn',
'leftOut':'lpx',
'leftLick':'ll',
'reward':'r',
'noreward':'nr'}, axis=1)
return df
def set_reward_flags(df):
'''
Set reward flags
Args:
df: dataframe with nTrial, r, and nr columns
Returns:
dataframe with added rewarded trial and not rewarded trial columns
'''
# Identify rewarded vs. unrewarded trials
df['r_trial'] = (df.groupby('nTrial')['r'].transform(np.sum) > 0) * 1.0
# df['nr_trial'] = (df.groupby('nTrial')['nr'].transform(np.sum) > 0) * 1.0
df['nr_trial'] = (df.groupby('nTrial')['r'].transform(np.sum) <= 0) * 1.0
return df
def set_port_entry_exit_rewarded_unrewarded_indicators(df):
'''
Set port entry, exit, and intersecting reward / non-reward indicators
Args:
df: dataframe with right / left port entry / exit columns and reward/no_reward indicators
Returns:
dataframe with right / left, rewarded / unrewarded intersection indicators
'''
# Identify combined reward vs. non-rewarded / left vs. right / entries vs. exits
df = df.assign(**{
# 'rpxr':df['r_trial']*df['rpx'],
# 'rpxnr':df['nr_trial']*df['rpx'],
# 'lpxr':df['r_trial']*df['lpx'],
# 'lpxnr':df['nr_trial']*df['lpx'],
# 'rpnr':df['r_trial']*df['rpn'],
# 'rpnnr':df['nr_trial']*df['rpn'],
# 'lpnr':df['r_trial']*df['lpn'],
# 'lpnnr':df['nr_trial']*df['lpn'],
'rpxr':df['r']*df['rpx'],
'rpxnr':df['nr']*df['rpx'],
'lpxr':df['r']*df['lpx'],
'lpxnr':df['nr']*df['lpx'],
'rpnr':df['r']*df['rpn'],
'rpnnr':df['nr']*df['rpn'],
'lpnr':df['r']*df['lpn'],
'lpnnr':df['nr']*df['lpn'],
})
return df
def define_side_agnostic_events(df):
'''
Define side agnostic events
Args:
df: dataframe with left / right entry / exit and rewarded / unrewarded indicators
Returns:
dataframe with added port entry/exit, and reward indicators
'''
df = df.assign(**{
'spn':df['rpn']+df['lpn'],
'spx':df['rpx']+df['lpx'],
'spnr':df['rpnr']+df['lpnr'],
'spnnr':df['rpnnr']+df['lpnnr'],
'spxr':df['rpxr']+df['lpxr'],
'spxnr':df['rpxnr']+df['lpxnr'],
'sl':df['rl']+df['ll'],
})
return df
def get_first_time_events(dfrel):
'''
Returns a list of first time events
Args:
dfrel: dataframe with entry, exit, reward, non-reward columns
Returns:
first_time_events: list of first time events
'''
dfrel['nn'] = dfrel[['lpn', 'rpn']].sum(axis=1)
dfrel['xx'] = dfrel[['lpx', 'rpx']].sum(axis=1)
first_trans = dfrel.groupby('nTrial')[['nn', 'xx', 'lpn', 'rpn', 'spn', 'lpx', 'rpx', 'spx', 'cpn']].cumsum()
first_trans = ((first_trans == 1)*1).diff()
first_trans *= first_trans >= 0
first_trans['lpn'] = dfrel['nn']*dfrel['lpn']
first_trans['rpn'] = dfrel['nn']*dfrel['rpn']
first_trans['spn'] = dfrel['nn']*dfrel['spn']
first_trans['lpx'] = dfrel['xx']*dfrel['lpx']
first_trans['rpx'] = dfrel['xx']*dfrel['rpx']
first_trans['spx'] = dfrel['xx']*dfrel['spx']
first_trans = first_trans.rename({_k:f'ft_{_k}' for _k in first_trans.columns}, axis=1)
dfrel[first_trans.columns] = first_trans
dfrel['ft_r_rpn'] = dfrel['ft_rpn'] * dfrel['r']
dfrel['ft_r_lpn'] = dfrel['ft_lpn'] * dfrel['r']
dfrel['ft_r_spn'] = dfrel['ft_spn'] * dfrel['r']
dfrel['ft_nr_rpn'] = dfrel['ft_rpn'] * dfrel['nr']
dfrel['ft_nr_lpn'] = dfrel['ft_lpn'] * dfrel['nr']
dfrel['ft_nr_spn'] = dfrel['ft_spn'] * dfrel['nr']
return dfrel
def preprocess_lynne(df, trial_shift_bounds=7):
'''
Preprocess Lynne's dataframe for GLM
Args:
df: dataframe with entry, exit, lick, reward, and dFF columns
Returns:
dataframe with entry, exit, lick, reward, and
'''
df = df[[_ for _ in df.columns if 'Unnamed' not in _]]
# print(df.columns)
df = rename_columns(df)
# print(df.columns)
df = define_trial_starts_ends(df, trial_shift_bounds=trial_shift_bounds)
print('Percent of Data in ITI:', (df['nTrial'] == df['nEndTrial']).mean())
# print(df)
df = set_reward_flags(df)
df = set_port_entry_exit_rewarded_unrewarded_indicators(df)
df = define_side_agnostic_events(df)
if 'index' in df.columns:
df = df.drop('index', axis=1)
dfrel = df.copy()
dfrel = dfrel.replace('False', 0).astype(float)
dfrel = dfrel*1
# dfrel = overwrite_response_with_toy(dfrel)
dfrel = dfrel[[_ for _ in dfrel.columns if 'Unnamed' not in _]]
dfrel = get_first_time_events(dfrel)
return dfrel
def detrend(df, y_col):
tmp = sglm_pp.detrend_data(df, y_col, [], 200)
df[y_col] = tmp
df = df.dropna()
return df
def get_is_not_iti(df):
'''
Returns a boolean array of whether the trial is not ITI
Args:
df: dataframe with entry, exit, lick, reward, and dFF columns
Returns:
boolean array of whether the trial is not ITI
'''
return df['nTrial'] != df['nEndTrial']
def timeshift_vals(dfrel, X_cols, neg_order=-7, pos_order=20):
'''
Timeshift values
Args:
dfrel: full dataframe
X_cols: list of columns to shift
neg_order: negative order of the timeshift
pos_order: positive order of the timeshift
Returns:
dfrel: dataframe with additional timeshifted columns
X_cols_sftd: list of shifted columns
'''
dfrel = sglm_ez.timeshift_cols(dfrel, X_cols[1:], neg_order=neg_order, pos_order=pos_order)
X_cols_sftd = sglm_ez.add_timeshifts_to_col_list(X_cols, X_cols[1:], neg_order=neg_order, pos_order=pos_order)
return dfrel, X_cols_sftd
def get_first_entry_time(tmp):
'''
Get first entry time
Args:
tmp: dataframe with ITI removed, and first_time (ft_rpn / ft_lpn / ft_cpn) columns defined
Returns:
dataframe with added time_adjusted columns releatvive to first entry
'''
# Get first entry time
tmp['1'] = 1
tmp['tim'] = tmp.groupby('nTrial')['1'].cumsum()
entry_timing_r = tmp.groupby('nTrial')['ft_rpn'].transform(lambda x: x.argmax()).astype(int)
entry_timing_l = tmp.groupby('nTrial')['ft_lpn'].transform(lambda x: x.argmax()).astype(int)
entry_timing = (entry_timing_r > entry_timing_l)*entry_timing_r + (entry_timing_r < entry_timing_l)*entry_timing_l
adjusted_time = (tmp['tim'] - entry_timing)
tmp['adjusted_time'] = adjusted_time
adjusted_time.index = tmp.index
entry_timing_c = tmp.groupby('nTrial')['ft_cpn'].transform(lambda x: x.argmax()).astype(int)
adjusted_time_c = (tmp['tim'] - entry_timing_c)
adjusted_time_c.index = tmp.index
tmp['cpn_adjusted_time'] = adjusted_time_c
return tmp
if __name__ == '__main__':
df = pd.read_csv('/Users/josh/Documents/Harvard/GLM/GLM_SIGNALS_WT68_12152021.txt')
df = df[[_ for _ in df.columns if 'Unnamed' not in _]]
print(df.columns)
df = rename_columns(df)
print(df.columns)
df = define_trial_starts_ends(df, trial_shift_bounds=1)
print(df)