@@ -63,9 +63,9 @@ def read_tsv(self,project_id,node,name='temp'):
6363
6464 def write_tsv (self ,df ,project_id ,node ,name = 'temp' ):
6565 if name is not None :
66- outname = "{}_{}_{}.tsv" .format (name ,project_id ,node )
66+ outname = "{0 }_{1 }_{2 }.tsv" .format (name ,project_id ,node )
6767 else :
68- outname = "{}_{}.tsv" .format (project_id ,node )
68+ outname = "{0 }_{1 }.tsv" .format (project_id ,node )
6969 try :
7070 df .to_csv (outname , sep = '\t ' , index = False , encoding = 'utf-8' )
7171 print ("\t Total of {} records written to node '{}' in file:\n \t \t {}." .format (len (df ),node ,outname ))
@@ -380,7 +380,7 @@ def batch_add_visits(self,project_id,new_dd,old_dd,links):
380380 print ("\t Total of {} missing visit links created for this batch." .format (total ))
381381 return df
382382
383- def move_props (self ,project_id ,from_node , to_node ,props ,dd ,parent_node = None ,required_props = None ,name = 'temp' ):
383+ def move_props (self ,project_id ,old_node , new_node ,props ,dd ,parent_node = None ,required_props = None ,name = 'temp' ):
384384 """
385385 This function takes a node with props to be moved (from_node) and moves those props/data to a new node (to_node).
386386 Fxn also checks whether the data for props to be moved actually has non-null data. If all data are null, no new records are created.
@@ -396,20 +396,15 @@ def move_props(self,project_id,from_node,to_node,props,dd,parent_node=None,requi
396396 """
397397 print ("\t Moving {} from '{}' to '{}'." .format (props ,from_node ,to_node ))
398398
399- from_name = "{}_{}_{}.tsv" .format (name ,project_id ,from_node ) #from imaging_exam
400- try :
401- df_from = pd .read_csv (from_name ,sep = '\t ' ,header = 0 ,dtype = str )
402- except FileNotFoundError as e :
403- print ("\t No '{}' TSV found with the data to be moved. Nothing to do. Finished." .format (from_node ))
404- return
399+ odf = self .read_tsv (project_id ,old_node )
405400
406- to_name = "{}_{}_{}.tsv" . format ( name , project_id , to_node ) #to reproductive_health
407- try :
408- df_to = pd . read_csv ( to_name , sep = ' \t ' , header = 0 , dtype = str )
401+ try : # if the new node TSV already exists, read it in, if not, create a new df
402+ ndf = self . read_tsv ( project_id , new_node )
403+ print ( " \t '{}' TSV already exists with {} records." . format ( new_node , len ( ndf )) )
409404 new_file = False
410405 except FileNotFoundError as e :
411- df_to = pd .DataFrame (columns = ['submitter_id' ])
412- print ("\t No '{}' TSV found. Creating new TSV for data to be moved." .format (to_node ))
406+ ndf = pd .DataFrame (columns = ['submitter_id' ])
407+ print ("\t No '{}' TSV found. Creating new TSV for data to be moved." .format (new_node ))
413408 new_file = True
414409
415410 # Check that the data to move is not entirely null. If it is, then give warning and quit.
@@ -510,39 +505,58 @@ def add_prop(self,project_id,node,props):
510505 return df
511506
512507
513- def change_prop_name (self ,project_id ,node ,props ,name = 'temp' ):
508+ def non_null_data (self ,project_id ,node ,prop ,name = 'temp' ):
509+ """ Returns the non-null data for a property.
514510 """
515- Changes the names of columns in a TSV.
511+ df = self .read_tsv (project_id = project_id ,node = node ,name = name )
512+ nn = df .loc [df [prop ].notnull ()]
513+ return nn
514+
515+
516+ def change_prop_name (self ,project_id ,node ,props ,name = 'temp' ,force = False ):
517+ """
518+ Changes the name of a column header in a single TSV.
519+ Checks TSV for existing non-null data for both old and new prop name.
520+
516521 Args:
517522 project_id(str): The project_id of the TSVs.
518523 node(str): The name of the node TSV to change column names in.
519- props(dict): A dict with keys of old prop names to change with values as new names.
524+ props(dict): A dict with keys of old prop names to change with values as new names. {'old_prop':'new_prop'}
520525 Example:
521526 This changes the column header "time_of_surgery" to "hour_of_surgery" in the surgery TSV.
522527 change_prop_name(project_id='P001',node='surgery',props={'time_of_surgery':'hour_of_surgery'})
523528 """
524529
525530 print ("\t Attempting to change prop names in {} node:\n \t \t {}" .format (node ,props ))
526531 df = self .read_tsv (project_id = project_id ,node = node ,name = name )
527- filename = "{0}_{1}_{2}.tsv" .format (name ,project_id ,node )
528532
529- prop1 = list (props )[0 ]
530- prop2 = props [prop1 ]
533+ old_prop = list (props )[0 ]
534+ new_prop = props [old_prop ]
531535
532- if prop1 not in df :
533- print ("\t \t No prop {0} found in the TSV. Nothing changed." .format (prop1 ))
534- elif prop2 in df :
535- print ("\t \t prop {0} already found in the TSV with {1} non-null records." .format (prop2 ,len (df .loc [df [prop2 ].notnull ()])))
536- else :
537- try :
538- df .rename (columns = props ,inplace = True )
539- df = self .write_tsv (df ,project_id ,node ,name = name )
540- except Exception as e :
541- print ("\t Couldn't change prop names: {}" .format (e ))
536+ if old_prop not in df : # old property not in TSV, fail
537+ print ("\t \t Old prop name '{0}' not found in the TSV. Nothing changed." .format (old_prop ))
538+ return df
539+
540+ if new_prop in df :
541+ ndf = df .loc [df [new_prop ].notnull ()]
542+ if len (ndf ) > 0 :
543+ print ("\t \t Existing new prop '{0}' data found in TSV: {1} non-null records! \n \n \n Check '{}' data before using this script!!!" .format (new_prop ,len (ndf ),props ))
544+ return df
545+ else : # if all data is null, drop the column
546+ df .drop (columns = [new_prop ],inplace = True )
547+
548+ try :
549+ df .rename (columns = props ,inplace = True )
550+ df = self .write_tsv (df ,project_id ,node ,name = name )
551+ ndf = df .loc [df [new_prop ].notnull ()]
552+ print ("\t \t Prop name changed from '{}' to '{}' in '{}' TSV with {} non-null records." .format (old_prop ,new_prop ,node ,len (ndf )))
553+
554+ except Exception as e :
555+ print ("\t Couldn't change prop names: {}" .format (e ))
542556
543557 return df
544558
545- def drop_props (self ,project_id ,node ,props ,name = 'temp' ):
559+ def drop_props (self ,project_id ,node ,props ,name = 'temp' , check_null = True ):
546560 """
547561 Function drops the list of props from column headers of a node TSV.
548562 Args:
@@ -558,7 +572,7 @@ def drop_props(self,project_id,node,props,name='temp'):
558572 else :
559573 print ("\t Please provide props to drop as a list or string:\n \t {}" .format (props ))
560574
561- print ("\t {}:\n \t \t Dropping {}." .format (node ,props ))
575+ print ("\t {}:\n \t \t Dropping props {}." .format (node ,props ))
562576
563577 df = self .read_tsv (project_id = project_id ,node = node ,name = name )
564578 filename = "{}_{}_{}.tsv" .format (name ,project_id ,node )
@@ -569,7 +583,7 @@ def drop_props(self,project_id,node,props,name='temp'):
569583 df = df .drop (columns = [prop ])
570584 dropped .append (prop )
571585 except Exception as e :
572- print ("\t Couldn't drop prop '{}' from '{}':\n \t {}" .format (prop ,node ,e ))
586+ print ("\t Couldn't drop prop '{}' from '{}' TSV :\n \t \t {}" .format (prop ,node ,e ))
573587 continue
574588
575589 if len (dropped ) > 0 :
0 commit comments