-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScoreFMRIBehavior.py
413 lines (373 loc) · 17.4 KB
/
ScoreFMRIBehavior.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
"""
Use this scoring program in other programs as:
import ScoreNeuroPsych
ScoreNeuroPsych.ScoreAll()
This could be added to the end of the GUI program, so that when the GUI is closed
the data os scored and updated.
"""
import os
import sys
import fnmatch
import shutil
import pandas as pd
import numpy as np
import glob
import datetime
import collections
# What folder is this file in?
dir_path = os.path.dirname(os.path.realpath(__file__))
Task_path = '/Users/jasonsteffener/Documents/GitHub/CognitiveTasks'
# This will load the config file containing the location of the data folder
# If there is an error it means that the GUI program has not been run.
# The GUI checks to see if thie config file exists. If it does not then it is created.
print(dir_path)
sys.path.append(os.path.join(Task_path,'ConfigFiles'))
#sys.path.append(os.path.join(dir_path, '..','CognitiveTasks','ConfigFiles'))
sys.path.append(os.path.join(dir_path, 'code'))
import ProcessNeuroPsychFunctions
import ProcessBehavioralFunctions
import ScoreNeuroPsych
import NeuropsychDataFolder
# Load up the data location as a global variable
AllInDataFolder = NeuropsychDataFolder.NeuropsychDataFolder
# Where to put the summary data
AllOutDataFolder = os.path.join(os.path.split(AllInDataFolder)[0])
def main():
# Cycle over all data folders and load them up
NewData = CycleOverDataFolders()
# find the name of the existing results file
ExistingDataFileName = LocateOutDataFile()
# Load the existing results file
if os.path.exists(ExistingDataFileName):
# Found the existing data file
OldData = LoadOutDataFile(ExistingDataFileName)
# created an updated results datafram, respectivein the "locked down"
# data rows
UpdatedData = CreateUpdatedDataFrameOfResults(NewData, OldData)
# Create an updated output file name
UpdatedDataFileName = CreateOutFileName()
# write out the updated data and move the old data file
WriteOutNewdataMoveOldData(UpdatedData, UpdatedDataFileName, ExistingDataFileName)
else:
# There is no old data file
OldData = []
NewData.to_csv(ExistingDataFileName, index = False, float_format='%.3f')
return NewData
def CycleOverDataFolders():
# Take as input the folder that contains folders of data
#cycle over folders
# Enter each folder and identify the visit folders in them.
# These start with the letter V
df = pd.DataFrame()
ListOfDict = []
# get all sub dirs
subdirs = glob.glob(os.path.join(AllInDataFolder,'*/'))
for subdir in subdirs:
# check subdir based on some criteria
CurDir = os.path.split(subdir)[0]
CurDir = os.path.split(CurDir)[-1]
if CurDir.isdigit() and CurDir[0] != '9':
#enter the directory and find visit folders
VisitFolders = glob.glob(os.path.join(subdir,'*/'))
# What if there are no visit folders?
# Then define teh visit as V001 and use a filename to idetify the visitid
# Check to see if there is a visiti folder in the subject folder and if there is
# no visit folder, check to see if there are data files
if (len(VisitFolders) > 0):
for visFold in VisitFolders:
CurVis = os.path.split(visFold)[0]
CurVis = os.path.split(CurVis)[-1]
if CurVis[-4:-2] == 'V0':
# From the directory structre extract the subject ID and the visit ID
subid = CurDir
Visid = CurVis
print('%s, %s'%(subid, Visid))
# Load up the raw data from the files in the visit folder
Results = LoadRawData(os.path.join(AllInDataFolder, subid, Visid),subid)
# Only add results if results are found to avoid a file with lots of empty rows
if len(Results) > 0:
FlatResults = FlattenDict(Results)
# add subid and visitid
FlatResults['AAsubid'] = subid
FlatResults['AAVisid'] = Visid
FlatResults['AAChecked'] = 0
ListOfDict.append(FlatResults)
elif len(os.listdir(subdir)) > 0:
# This seems to be data in a subject folder with no visit folder
# Create the visit ID
subid = CurDir
Visid = FindVisitIDFromFileNames(subdir)
# Load up the raw data from the files in the visit folder
Results = LoadRawData(os.path.join(AllInDataFolder, subid),subid)
FlatResults = FlattenDict(Results)
# add subid and visitid
FlatResults['AAsubid'] = subid
FlatResults['AAVisid'] = Visid
FlatResults['AAChecked'] = 0
ListOfDict.append(FlatResults)
df = pd.DataFrame(ListOfDict)
return df
def FindVisitIDFromFileNames(subdir):
# Make a list of the files in the folder
ListOfFiles = glob.glob(os.path.join(subdir,'*.csv'))
# Find the date and time stamps encoded in teh file names
for filePath in ListOfFiles:
Visid = ProcessBehavioralFunctions.ParseFileNamesForDateTime(filePath)
break
return Visid
def LoadRawData(VisitFolder, subid):
# Given a visit folder, check for the existance of specific files
# read the file and process teh results
# This function looks for very specific files
print('working on %s'%(subid))
Results = collections.OrderedDict()
# DMS
Data1 = ReadFile(VisitFolder, subid, 'DMS_Block_MRIRun1')
if len(Data1) > 0:
try:
CapacityData = ReadFile(VisitFolder, subid, 'DMS_CAPACITY')
Data1 = ProcessNeuroPsychFunctions.CheckDMSDataFrameForLoad(Data1)
tempResults = ProcessNeuroPsychFunctions.ProcessDMSBlockv2(Data1, CapacityData)
Results['DMSMRI1'] = ScoreNeuroPsych.Reorder_DMS_VSTM_Results(tempResults, 'DMS')
print('\tDMS loaded')
except:
pass
Data2 = ReadFile(VisitFolder, subid, 'DMS_Block_MRIRun2')
if len(Data2) > 0:
try:
CapacityData = ReadFile(VisitFolder, subid, 'DMS_CAPACITY')
Data2 = ProcessNeuroPsychFunctions.CheckDMSDataFrameForLoad(Data2)
tempResults = ProcessNeuroPsychFunctions.ProcessDMSBlockv2(Data2, CapacityData)
Results['DMSMRI2'] = ScoreNeuroPsych.Reorder_DMS_VSTM_Results(tempResults, 'DMS')
print('\tDMS loaded')
except:
pass
if len(Data1) > 0 and len(Data2) > 0:
AllData = Data1.append(Data2)
if len(AllData) > 0:
try:
CapacityData = ReadFile(VisitFolder, subid, 'DMS_CAPACITY')
AllData = ProcessNeuroPsychFunctions.CheckDMSDataFrameForLoad(AllData)
tempResults = ProcessNeuroPsychFunctions.ProcessDMSBlockv2(AllData, CapacityData)
Results['DMSMRIAll'] = ScoreNeuroPsych.Reorder_DMS_VSTM_Results(tempResults, 'DMS')
print('\tBoth DMS loaded')
except:
pass
# VSTM
VSTMData1 = ReadFile(VisitFolder, subid, 'VSTM_Block_MRIRun1')
if len(VSTMData1) > 0:
try:
CapacityData = ReadFile(VisitFolder, subid, 'VSTM_CAPACITY')
tempResults = ProcessNeuroPsychFunctions.ProcessVSTMBlockv2(VSTMData1, CapacityData)
Results['VSTMMRI1'] = ScoreNeuroPsych.Reorder_DMS_VSTM_Results(tempResults, 'VSTM')
print('\tVSTM loaded')
except:
pass
VSTMData2 = ReadFile(VisitFolder, subid, 'VSTM_Block_MRIRun2')
if len(VSTMData2) > 0:
try:
CapacityData = ReadFile(VisitFolder, subid, 'VSTM_CAPACITY')
tempResults = ProcessNeuroPsychFunctions.ProcessVSTMBlockv2(VSTMData2, CapacityData)
Results['VSTMMRI2'] = ScoreNeuroPsych.Reorder_DMS_VSTM_Results(tempResults, 'VSTM')
print('\tVSTM loaded')
except:
pass
if len(VSTMData1) > 0 and len(VSTMData2) > 0:
VSTMAllData = VSTMData1.append(VSTMData2)
if len(VSTMAllData) > 0:
try:
tempResults = ProcessNeuroPsychFunctions.ProcessVSTMBlockv2(VSTMAllData, CapacityData)
Results['VSTMMRIAll'] = ScoreNeuroPsych.Reorder_DMS_VSTM_Results(tempResults, 'VSTM')
print('\tVBoth VSTM loaded')
except:
pass
# N-Back
Data1 = ReadFile(VisitFolder, subid, 'NBack_012012_MRIRun01')
if len(Data1) > 0:
Results['NBackMRI1'] = ProcessNeuroPsychFunctions.ProcessNBack(Data1)
print('\tN-Back loaded')
Data2 = ReadFile(VisitFolder, subid, 'NBack_012012_MRIRun02')
if len(Data2) > 0:
Results['NBackMRI2'] = ProcessNeuroPsychFunctions.ProcessNBack(Data2)
print('\tN-Back loaded')
if len(Data1) > 0 and len(Data2) > 0:
AllData = Data1.append(Data2)
if len(AllData) > 0:
Results['NBackMRIAll'] = ProcessNeuroPsychFunctions.ProcessNBack(AllData)
print('\tBoth N-Back loaded')
# Data = ReadFile(VisitFolder, subid, 'DMS_Block_MRIRun1')
# Data = CheckDMSDataFrameForLoad(Data)
# Results['DMSMRI1'] = ProcessDMSBlockv2(Data)
#
# Data = ReadFile(VisitFolder, subid, 'DMS_Block_MRIRun2')
# Data = CheckDMSDataFrameForLoad(Data)
# Results['DMSMRI2'] = ProcessDMSBlockv2(Data)
#
# Data = ReadFile(VisitFolder, subid, 'DMS_Block_BehRun1')
# Data = CheckDMSDataFrameForLoad(Data)
# Results['DMSBeh1'] = ProcessDMSBlockv2(Data)
#
return Results
def ReadFile(VisitFolder, subid, TaskTag):
# Find the file that matches the TaskTag
# If multiple CSV files are found then the user is prompted to pick one.
# Un selected files are renamed with XXX at their beginning.
# The next time this program is run on this folder there will now only be one file
# available and the user will not be prompted again
Data = []
# List all files in the visit folder
ll = os.listdir(VisitFolder)
# create the string you are looking for which is a combo of the subid and the task name
SearchString = subid + '_' + TaskTag
matching = fnmatch.filter(ll,SearchString+'*.csv')
# It is possible that there are multipel files with similar names.
# The following asks the user for the correct one and then renames the others
count = 1
if len(matching) > 1:
# There are more than one file!
print('There are multiple files found for %s in folder: %s'%(SearchString, VisitFolder))
for i in matching:
# print the name and size of files
SizeOfFile = np.round(os.stat(os.path.join(VisitFolder,matching[0])).st_size/1048)
print('\t%d) %s, size = %0.0f kB'%(count, i,SizeOfFile))
count += 1
sel = input('Which one should be kept? (Press return to skip) ')
if len(sel) > 0:
SelectedFile = matching[int(sel)-1]
# Rename the unselected files so they will hopefully not be selected the next time!
count = 1
for i in matching:
if not count == int(sel):
OutName = 'XXX_' + i
print(OutName)
shutil.move(os.path.join(VisitFolder,i), os.path.join(VisitFolder, OutName))
count += 1
else:
# If none of teh files are selected, rename them so they are not viewed again
count = 1
for i in matching:
OutName = 'XXX_' + i
print(OutName)
shutil.move(os.path.join(VisitFolder,i), os.path.join(VisitFolder, OutName))
SelectedFile = False
elif len(matching) == 1:
SelectedFile= matching[0]
else:
SelectedFile = False
print('Did not find any files!!!')
if SelectedFile != False:
# Now open the file
InputFile = os.path.join(VisitFolder, SelectedFile)
# Read whole file into a dataframe
# Note, in order for the data to be read as a dataframe all columns need to have headings.
# If not an error is thrown
# Check to see what size the file is to make sure it is not empty
if os.stat(InputFile).st_size == 0:
print('File is empty')
else:
Data = pd.read_csv(InputFile)
# If the data is to be read into a big list use this:
# fid = open(InputFile, 'r')
# Data = csv.reader(fid)
# Data = list(Data)
return Data
def FlattenDict(Results):
# The process functions all return a dictionary of their results.
# In order to write these results to a CSV fuile the dictionaries need to be flattened first
#
# cycle over tasks
# Use an ordered dictionary
FlatResults = collections.OrderedDict()
for i in Results.keys():
for j in Results[i].keys():
FlatResults['%s_%s'%(i,j)] = Results[i][j]
return FlatResults
def PutDataIntoOutputFile():
# There will be a single output resultsvfile
# it will have these columns:
# partID
# visitID, which will often be 1,2,3
# data checked, this cannot be changed by the program but only by a human
# data completeness flag
#
#
# First, the part id and visit id are read from the folder names.
# Then the output data is checked to find this person. If found the data checked flag is TRUE
# if yes, check to see if data is complete in out file.
# if not, then load all data and see if the missing data is now available
df2 = pd.DataFrame.from_dict(FlatResults, orient='index')
pass
def LoadOutDataFile(OutDataFilePathName):
# Make a data frame from CSV file
OutDF = pd.read_csv(OutDataFilePathName)
return OutDF
def CreateOutFileName():
# Create a file to hold processed data using the time and date
# to indicate when it was made
BaseFileName = 'NCM_Master_MRI'
now = datetime.datetime.now()
NowString = now.strftime("_updated_%b-%d-%Y_%H-%M.csv")
NewOutFileName = os.path.join(AllOutDataFolder, BaseFileName + NowString)
return NewOutFileName
def LocateOutDataFile():
# Locate an existing processed data file and if it does not exist, then make it.
BaseFileName = 'NCM_Master_MRI'
# What files exist with this name?
Files = glob.glob(os.path.join(AllOutDataFolder, BaseFileName + '*.csv'))
now = datetime.datetime.now()
NowString = now.strftime("_updated_%b-%d-%Y_%H-%M.csv")
NewOutFileName = BaseFileName + NowString
if len(Files) == 0:
FileName = os.path.join(AllOutDataFolder, NewOutFileName)
else:
# this will open an existing file
FileName = Files[-1]
return FileName
def CreateUpdatedDataFrameOfResults(NewData, OldData):
# Create a dataframe to hold teh updated data
OutDataFrame = pd.DataFrame()
# Now cycle over each row and compare
for index, NewRow in NewData.iterrows():
# Add the new data
NewDataSubId = NewRow['AAsubid']
NewDataVisitId = NewRow['AAVisid']
print(NewDataSubId)
print(NewDataVisitId)
# for each subid and visit found in the new data look for it in the old data
# If the same subid/visitid is found in both check the Old data column
# labeled AAChecked to see if it is 1. If so then skip this data line in the new data
# and go onto the next one.
# If the value is 0 in the old data file, then the data line has NOT been checked
# by a human and it is OK for this program to overwrite it.
# Data is written out as follows:
OldRow = OldData.loc[(OldData['AAsubid'] == int(NewDataSubId)) & (OldData['AAVisid'] == NewDataVisitId)]
if len(OldRow) > 0:
# found this data in the exisiting data file
# Check to see if the data has been checked by a human
if int(OldRow['AAChecked']) == 1:
# It has been checked
# write temp to the out data file
OutDataRow = OldRow
else:
# Data is in the out file but it has not been checked
OutDataRow = NewRow
else:
# Did not find the new data in the old data file
OutDataRow = NewRow
# Add OutDataRow to the updated out dataframe
OutDataFrame = OutDataFrame.append(OutDataRow)
return OutDataFrame
def WriteOutNewdataMoveOldData(UpdatedData, UpdatedDataFileName, ExistingDataFileName):
# Move the old file
OldDataFolder = os.path.join(AllOutDataFolder, 'OldResultFiles')
# if the folder for old data does not exist, then make it
if not os.path.exists(OldDataFolder):
os.mkdir(OldDataFolder)
# change the name of the results file so it is not confused with current data
MovedDataFile = os.path.join(OldDataFolder, 'X_'+os.path.basename(ExistingDataFileName))
shutil.move(ExistingDataFileName, MovedDataFile)
# Now that the old data is moved, write out the updated data
UpdatedData.to_csv(UpdatedDataFileName, index = False, float_format='%.3f')
#
if __name__ == "__main__":
main()