@@ -68,7 +68,7 @@ def write_tsv(self,df,project_id,node,name='temp'):
6868 df .to_csv (outname , sep = '\t ' , index = False , encoding = 'utf-8' )
6969 print ("\t Total of {} records written to node '{}' in file:\n \t \t {}." .format (len (df ),node ,outname ))
7070 except Exception as e :
71- print ("Error writing TSV file: {}" .format (e ))
71+ print ("\t Error writing TSV file: {}" .format (e ))
7272 return df
7373
7474 def make_temp_files (self ,prefix ,suffix ,name = 'temp' ,overwrite = True ,nodes = ['all' ]):
@@ -121,7 +121,7 @@ def merge_nodes(self,project_id,in_nodes,out_node,name='temp'):
121121 in_nodes(list): List of node TSVs to merge into a single TSV.
122122 out_node(str): The name of the new merged TSV.
123123 """
124- print ("Merging nodes {} to '{}'." .format (in_nodes ,out_node ))
124+ print ("\t Merging nodes {} to '{}'." .format (in_nodes ,out_node ))
125125 dfs = []
126126 for node in in_nodes :
127127 filename = "{}_{}_{}.tsv" .format (name ,project_id , node )
@@ -166,11 +166,11 @@ def merge_properties(self,project_id,node,properties,name='temp'):
166166 df_merged = pd .concat ([df_rest ,df_old ],ignore_index = True ,sort = False )
167167 df = df_merged .drop (columns = [old_prop ])
168168 dropped .append (old_prop )
169- print ("Property '{}' merged into '{}' and dropped from '{}' TSV." .format (old_prop ,prop ,node ))
169+ print ("\t Property '{}' merged into '{}' and dropped from '{}' TSV." .format (old_prop ,prop ,node ))
170170 else :
171- print ("Property '{}' not found in '{}' TSV. Skipping..." .format (old_prop ,node ))
171+ print ("\t Property '{}' not found in '{}' TSV. Skipping..." .format (old_prop ,node ))
172172 if len (dropped ) > 0 :
173- print ("Properties {} merged into {}." .format (dropped ,list (properties .keys ())))
173+ print ("\t Properties {} merged into {}." .format (dropped ,list (properties .keys ())))
174174 df = self .write_tsv (df ,project_id ,node )
175175 return df
176176 else :
@@ -218,7 +218,7 @@ def create_missing_links(self,project_id,node,link,old_parent,properties,new_dd,
218218 create_missing_links(node='imaging_exam',link='visit',old_parent='cases',properties={'visit_label':'Imaging','visit_method':'In-person Visit'},new_dd=dd,old_dd=prod_dd,links=None)
219219 create_missing_links(node='diagnosis',link='visit',old_parent='cases',properties={'visit_label':'Unknown','visit_method':'Unknown'},new_dd=dd,old_dd=prod_dd)
220220 """
221- print ("Creating missing '{}' records with links to '{}' for '{}'." .format (link ,old_parent ,node ))
221+ print ("\t Creating missing '{}' records with links to '{}' for '{}'." .format (link ,old_parent ,node ))
222222
223223 df = self .read_tsv (project_id = project_id ,node = node ,name = name )
224224 # filename = "{}_{}_{}.tsv".format(name,project_id,node)
@@ -341,7 +341,7 @@ def batch_add_visits(self,project_id,new_dd,old_dd,links):
341341 print ("\t No links to 'case' found in the '{}' TSV." .format (node ))
342342 if len (dfs ) > 0 :
343343 df = pd .concat (dfs ,ignore_index = True ,sort = False )
344- print ("Total of {} missing visit links created for this batch." .format (total ))
344+ print ("\t Total of {} missing visit links created for this batch." .format (total ))
345345 return df
346346
347347 def move_properties (self ,project_id ,from_node ,to_node ,properties ,dd ,parent_node = None ,required_props = None ,name = 'temp' ):
@@ -358,7 +358,7 @@ def move_properties(self,project_id,from_node,to_node,properties,dd,parent_node=
358358 This moves the property 'military_status' from 'demographic' node to 'military_history' node, which should link to the same parent node 'case'.
359359 move_properties(from_node='demographic',to_node='military_history',properties=['military_status'],parent_node='case')
360360 """
361- print ("Moving {} from '{}' to '{}'." .format (properties ,from_node ,to_node ))
361+ print ("\t Moving {} from '{}' to '{}'." .format (properties ,from_node ,to_node ))
362362
363363 from_name = "{}_{}_{}.tsv" .format (name ,project_id ,from_node ) #from imaging_exam
364364 try :
@@ -423,10 +423,10 @@ def move_properties(self,project_id,from_node,to_node,properties,dd,parent_node=
423423 if len (vals ) == 1 :
424424 case_data .loc [case_data ['submitter_id' ]== case_id ,header ] = vals
425425 elif len (vals ) > 1 :
426- print ("{}: {}" .format (header ,vals ))
426+ print ("\t {}: {}" .format (header ,vals ))
427427 if header == 'age_at_enrollment' : # special case hard-coded for BRAIN Commons migration
428428 lowest_val = min (vals , key = float )
429- print ("Selecting lowest value '{}' from {}." .format (lowest_val ,vals ))
429+ print ("\t Selecting lowest value '{}' from {}." .format (lowest_val ,vals ))
430430 case_data .loc [case_data ['submitter_id' ]== case_id ,header ] = lowest_val
431431 count += 1
432432 all_to = pd .merge (df_to ,case_data ,on = 'submitter_id' , how = 'left' )
@@ -450,10 +450,10 @@ def move_properties(self,project_id,from_node,to_node,properties,dd,parent_node=
450450 for prop in to_required :
451451 if prop in list (required_props .keys ()):
452452 all_to [prop ] = required_props [prop ]
453- print ("Missing required property '{}' added to new '{}' TSV with all {} values." .format (prop ,to_node ,required_props [prop ]))
453+ print ("\t Missing required property '{}' added to new '{}' TSV with all {} values." .format (prop ,to_node ,required_props [prop ]))
454454 else :
455455 all_to [prop ] = np .nan
456- print ("Missing required property '{}' added to new '{}' TSV with all null values." .format (prop ,to_node ))
456+ print ("\t Missing required property '{}' added to new '{}' TSV with all null values." .format (prop ,to_node ))
457457
458458 all_to .to_csv (to_name ,sep = '\t ' ,index = False ,encoding = 'utf-8' )
459459 print ("\t Properties moved to '{}' node from '{}'. Data saved in file:\n \t {}" .format (to_node ,from_node ,to_name ))
@@ -468,7 +468,7 @@ def add_property(self,project_id,node,properties):
468468 if prop not in list (df ):
469469 df [prop ] = properties [prop ]
470470 else :
471- print ("Property '{}' already in the TSV for node '{}'." .format (prop ,node ))
471+ print ("\t Property '{}' already in the TSV for node '{}'." .format (prop ,node ))
472472
473473 df .to_csv (filename ,sep = '\t ' ,index = False ,encoding = 'utf-8' )
474474 return df
@@ -667,7 +667,7 @@ def merge_links(self,project_id,node,link,links_to_merge,name='temp'):
667667 sid = "{}.submitter_id" .format (sublink )
668668 df .loc [df [link_name ].isnull (), link_name ] = df [sid ]
669669 df .to_csv (filename ,sep = '\t ' ,index = False ,encoding = 'utf-8' )
670- print ("Links merged to '{}' and data written to TSV file: \n \t {}" .format (link ,filename ))
670+ print ("\t Links merged to '{}' and data written to TSV file: \n \t \t {}" .format (link ,filename ))
671671 return df
672672
673673 def drop_ids (self ,project_id ,node ,name = 'temp' ):
@@ -702,7 +702,7 @@ def batch_drop_ids(self,project_id,suborder,name='temp'):
702702 for node_order in suborder :
703703
704704 node = node_order [0 ]
705- print (node )
705+ print (" \t {}:" . format ( node ) )
706706
707707 df = self .read_tsv (project_id = project_id ,node = node ,name = name )
708708 # filename = "{}_{}_{}.tsv".format(name,project_id,node)
@@ -733,7 +733,7 @@ def create_project(self,program,project):
733733 }}""" .format (program ,program )
734734 prog_json = json .loads (prog_txt )
735735 data = self .sub .create_program (json = prog_json )
736- print (data )
736+ print (" \t {}" . format ( data ) )
737737 proj_txt = """{{
738738 "type": "project",
739739 "code": "{}",
@@ -742,7 +742,7 @@ def create_project(self,program,project):
742742 }}""" .format (project ,project ,project )
743743 proj_json = json .loads (proj_txt )
744744 data = self .sub .create_project (program = program ,json = proj_json )
745- print (data )
745+ print (" \t {}" . format ( data ) )
746746
747747 def remove_special_chars (self ,project_id ,node ,name = 'temp' ):
748748 """ Replace a special character in 'Parkinson's Disease'
@@ -758,10 +758,10 @@ def remove_special_chars(self,project_id,node,name='temp'):
758758 df_txt2 = re .sub (substring ,"Parkinson's Disease" ,df_txt )
759759 df = pd .read_csv (StringIO (df_txt2 ),sep = '\t ' ,dtype = str ) # this converts int to float (adds .0 to int)
760760 df .to_csv (filename ,sep = '\t ' ,index = False , encoding = 'utf-8' )
761- print ("Special chars removed from: {}" .format (filename ))
761+ print ("\t Special chars removed from: {}" .format (filename ))
762762
763763 else :
764- print ("No special chars found in {}" .format (filename ))
764+ print ("\t No special chars found in {}" .format (filename ))
765765
766766 return df
767767
@@ -773,7 +773,7 @@ def floats_to_integers(self,project_id,node,prop,name='temp'):
773773
774774 df [prop ] = df [prop ].str .extract (r'^(\d+).0$' , expand = True )
775775 df .to_csv (filename ,sep = '\t ' ,index = False , encoding = 'utf-8' )
776- print ("Trailing '.0' decimals removed from: {}" .format (filename ))
776+ print ("\t Trailing '.0' decimals removed from: {}" .format (filename ))
777777 return df
778778
779779 def get_submission_order (self ,dd ,project_id ,name = 'temp' ,suffix = 'tsv' ,missing_nodes = ['project' ,'study' ,'case' ,'visit' ]):
@@ -798,7 +798,7 @@ def get_submission_order(self,dd,project_id,name='temp',suffix='tsv',missing_nod
798798 else :
799799 print ("\t The node '{}' is not in the data dictionary! Skipping..." .format (node ))
800800
801- print ("\t Found the following nodes:\n \t {}" .format (all_nodes ))
801+ print ("\t Found the following nodes:\n \t \t {}" .format (all_nodes ))
802802
803803 # Check for the common missing root nodes
804804 for missing_node in missing_nodes :
@@ -809,7 +809,7 @@ def get_submission_order(self,dd,project_id,name='temp',suffix='tsv',missing_nod
809809 while len (all_nodes ) > 0 :
810810
811811 node = all_nodes .pop (0 )
812- print ("\t Determining order for node '{}'." .format (node ))
812+ # print("\tDetermining order for node '{}'.".format(node)) # for trouble-shooting
813813
814814 node_links = dd [node ]['links' ]
815815 for link in node_links :
@@ -840,7 +840,7 @@ def get_submission_order(self,dd,project_id,name='temp',suffix='tsv',missing_nod
840840 else : #skip it for now
841841 all_nodes .append (node )
842842 else :
843- print ("No link target_type found for node '{}'" .format (node ))
843+ print ("\t No link target_type found for node '{}'" .format (node ))
844844 #suborder = sorted(suborder.items(), key=operator.itemgetter(1))
845845 suborder = {key :val for key , val in suborder .items () if val > 0 }
846846 print ("\t Submission Order: \n \t \t {}" .format (suborder ))
@@ -873,32 +873,34 @@ def submit_tsvs(self,project_id,suborder,check_done=False,name='temp'):
873873 data = self .sub .submit_file (project_id = project_id ,filename = filename ,chunk_size = 1000 )
874874 #print("data: {}".format(data)) #for trouble-shooting
875875 logfile .write (filename + '\n ' + json .dumps (data )+ '\n \n ' ) #put in log file
876+
876877 if len (data ['invalid' ]) == 0 and len (data ['succeeded' ]) > 0 :
877- cmd = ['mv' ,filename ,'done' ]
878+ mv_done_cmd = ['mv' ,filename ,'done' ]
878879 try :
879- output = subprocess .check_output (cmd , stderr = subprocess .STDOUT ).decode ('UTF-8' )
880- print ("Submission successful. Moving file to done:\n \t {}\n \n " .format (filename ))
880+ output = subprocess .check_output (mv_done_cmd , stderr = subprocess .STDOUT ).decode ('UTF-8' )
881+ print ("Submission successful. Moving file to done:\n \t \t {}\n \n " .format (filename ))
881882 except Exception as e :
882883 output = e .output .decode ('UTF-8' )
883884 print ("ERROR:" + output )
884885 else :
885886 if len (data ['invalid' ])> 0 :
886887 invalid_records = list (data ['invalid' ].keys ())[0 :10 ]
887888 for i in invalid_records :
888- print (data ['invalid' ][i ])
889- print ("Need to fix errors in {}" .format (filename ))
890- cmd = ['mv' ,filename ,'failed' ]
889+ print ("{}" .format (data ['invalid' ][i ]))
890+ print ("Need to fix {} errors in '{}'" .format (len (invalid_records ),filename ))
891+
892+ mv_failed_cmd = ['mv' ,filename ,'failed' ]
891893 try :
892- output = subprocess .check_output (cmd , stderr = subprocess .STDOUT ).decode ('UTF-8' )
893- print ("Submission successful . Moving file to done :\n \t {} \n \n " .format (filename ))
894+ output = subprocess .check_output (mv_failed_cmd , stderr = subprocess .STDOUT ).decode ('UTF-8' )
895+ print ("Submission failed . Moving file to failed :\n \t \t {} " .format (filename ))
894896 except Exception as e :
895897 output = e .output .decode ('UTF-8' )
896898 print ("ERROR:" + output )
897899
898900 except Exception as e :
899- print (e )
901+ print (" \t {}" . format ( e ) )
900902 else :
901- print ("\n Previously submitted file already exists in done directory:\n \t {}\n " .format (done_file ))
903+ print ("\t Previously submitted file already exists in done directory:\n \t \t {}\n " .format (done_file ))
902904
903905 def check_migration_counts (self , projects = None , overwrite = False ):
904906 """ Gets counts and downloads TSVs for all nodes for every project.
@@ -925,11 +927,12 @@ def check_migration_counts(self, projects=None, overwrite=False):
925927 query_txt = """{_%s_count (project_id:"%s")}""" % (node ,project_id )
926928 res = self .sub .query (query_txt )
927929 count = res ['data' ][str ('_' + node + '_count' )]
928- print (str (count ) + ' records found in node ' + node + ' in project ' + project_id )
930+ print ("\t {} records found in node '{}' in project '{}'." .format (str (count ),node ,project_id ))
931+
929932 if count > 0 :
930933 filename = str (mydir + '/' + project_id + '_' + node + '.tsv' )
931934 if (os .path .isfile (filename )) and (overwrite is False ):
932- print ('Previously downloaded ' + filename )
935+ print ('\t Previously downloaded ' + filename )
933936 else :
934937 prog ,proj = project_id .split ('-' ,1 )
935938 self .sub .export_node (prog ,proj ,node ,'tsv' ,filename )
0 commit comments