Skip to content

Commit 9dd760d

Browse files
committed
updates
1 parent 18cba8a commit 9dd760d

File tree

1 file changed

+47
-33
lines changed

1 file changed

+47
-33
lines changed

migration/migration.py

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ def read_tsv(self,project_id,node,name='temp'):
6363

6464
def write_tsv(self,df,project_id,node,name='temp'):
6565
if name is not None:
66-
outname = "{}_{}_{}.tsv".format(name,project_id,node)
66+
outname = "{0}_{1}_{2}.tsv".format(name,project_id,node)
6767
else:
68-
outname = "{}_{}.tsv".format(project_id,node)
68+
outname = "{0}_{1}.tsv".format(project_id,node)
6969
try:
7070
df.to_csv(outname, sep='\t', index=False, encoding='utf-8')
7171
print("\tTotal of {} records written to node '{}' in file:\n\t\t{}.".format(len(df),node,outname))
@@ -380,7 +380,7 @@ def batch_add_visits(self,project_id,new_dd,old_dd,links):
380380
print("\tTotal of {} missing visit links created for this batch.".format(total))
381381
return df
382382

383-
def move_props(self,project_id,from_node,to_node,props,dd,parent_node=None,required_props=None,name='temp'):
383+
def move_props(self,project_id,old_node,new_node,props,dd,parent_node=None,required_props=None,name='temp'):
384384
"""
385385
This function takes a node with props to be moved (from_node) and moves those props/data to a new node (to_node).
386386
Fxn also checks whether the data for props to be moved actually has non-null data. If all data are null, no new records are created.
@@ -396,20 +396,15 @@ def move_props(self,project_id,from_node,to_node,props,dd,parent_node=None,requi
396396
"""
397397
print("\tMoving {} from '{}' to '{}'.".format(props,from_node,to_node))
398398

399-
from_name = "{}_{}_{}.tsv".format(name,project_id,from_node) #from imaging_exam
400-
try:
401-
df_from = pd.read_csv(from_name,sep='\t',header=0,dtype=str)
402-
except FileNotFoundError as e:
403-
print("\tNo '{}' TSV found with the data to be moved. Nothing to do. Finished.".format(from_node))
404-
return
399+
odf = self.read_tsv(project_id,old_node)
405400

406-
to_name = "{}_{}_{}.tsv".format(name,project_id,to_node) #to reproductive_health
407-
try:
408-
df_to = pd.read_csv(to_name,sep='\t',header=0,dtype=str)
401+
try: # if the new node TSV already exists, read it in, if not, create a new df
402+
ndf = self.read_tsv(project_id,new_node)
403+
print("\t'{}' TSV already exists with {} records.".format(new_node,len(ndf)))
409404
new_file = False
410405
except FileNotFoundError as e:
411-
df_to = pd.DataFrame(columns=['submitter_id'])
412-
print("\tNo '{}' TSV found. Creating new TSV for data to be moved.".format(to_node))
406+
ndf = pd.DataFrame(columns=['submitter_id'])
407+
print("\tNo '{}' TSV found. Creating new TSV for data to be moved.".format(new_node))
413408
new_file = True
414409

415410
# Check that the data to move is not entirely null. If it is, then give warning and quit.
@@ -510,39 +505,58 @@ def add_prop(self,project_id,node,props):
510505
return df
511506

512507

513-
def change_prop_name(self,project_id,node,props,name='temp'):
508+
def non_null_data(self,project_id,node,prop,name='temp'):
509+
""" Returns the non-null data for a property.
514510
"""
515-
Changes the names of columns in a TSV.
511+
df = self.read_tsv(project_id=project_id,node=node,name=name)
512+
nn = df.loc[df[prop].notnull()]
513+
return nn
514+
515+
516+
def change_prop_name(self,project_id,node,props,name='temp',force=False):
517+
"""
518+
Changes the name of a column header in a single TSV.
519+
Checks TSV for existing non-null data for both old and new prop name.
520+
516521
Args:
517522
project_id(str): The project_id of the TSVs.
518523
node(str): The name of the node TSV to change column names in.
519-
props(dict): A dict with keys of old prop names to change with values as new names.
524+
props(dict): A dict with keys of old prop names to change with values as new names. {'old_prop':'new_prop'}
520525
Example:
521526
This changes the column header "time_of_surgery" to "hour_of_surgery" in the surgery TSV.
522527
change_prop_name(project_id='P001',node='surgery',props={'time_of_surgery':'hour_of_surgery'})
523528
"""
524529

525530
print("\tAttempting to change prop names in {} node:\n\t\t{}".format(node,props))
526531
df = self.read_tsv(project_id=project_id,node=node,name=name)
527-
filename = "{0}_{1}_{2}.tsv".format(name,project_id,node)
528532

529-
prop1 = list(props)[0]
530-
prop2 = props[prop1]
533+
old_prop = list(props)[0]
534+
new_prop = props[old_prop]
531535

532-
if prop1 not in df:
533-
print("\t\tNo prop {0} found in the TSV. Nothing changed.".format(prop1))
534-
elif prop2 in df:
535-
print("\t\tprop {0} already found in the TSV with {1} non-null records.".format(prop2,len(df.loc[df[prop2].notnull()])))
536-
else:
537-
try:
538-
df.rename(columns=props,inplace = True)
539-
df = self.write_tsv(df,project_id,node,name=name)
540-
except Exception as e:
541-
print("\tCouldn't change prop names: {}".format(e))
536+
if old_prop not in df: # old property not in TSV, fail
537+
print("\t\tOld prop name '{0}' not found in the TSV. Nothing changed.".format(old_prop))
538+
return df
539+
540+
if new_prop in df:
541+
ndf = df.loc[df[new_prop].notnull()]
542+
if len(ndf) > 0:
543+
print("\t\tExisting new prop '{0}' data found in TSV: {1} non-null records! \n\n\nCheck '{}' data before using this script!!!".format(new_prop,len(ndf),props))
544+
return df
545+
else: # if all data is null, drop the column
546+
df.drop(columns=[new_prop],inplace=True)
547+
548+
try:
549+
df.rename(columns=props,inplace = True)
550+
df = self.write_tsv(df,project_id,node,name=name)
551+
ndf = df.loc[df[new_prop].notnull()]
552+
print("\t\tProp name changed from '{}' to '{}' in '{}' TSV with {} non-null records.".format(old_prop,new_prop,node,len(ndf)))
553+
554+
except Exception as e:
555+
print("\tCouldn't change prop names: {}".format(e))
542556

543557
return df
544558

545-
def drop_props(self,project_id,node,props,name='temp'):
559+
def drop_props(self,project_id,node,props,name='temp',check_null=True):
546560
"""
547561
Function drops the list of props from column headers of a node TSV.
548562
Args:
@@ -558,7 +572,7 @@ def drop_props(self,project_id,node,props,name='temp'):
558572
else:
559573
print("\tPlease provide props to drop as a list or string:\n\t{}".format(props))
560574

561-
print("\t{}:\n\t\tDropping {}.".format(node,props))
575+
print("\t{}:\n\t\tDropping props {}.".format(node,props))
562576

563577
df = self.read_tsv(project_id=project_id,node=node,name=name)
564578
filename = "{}_{}_{}.tsv".format(name,project_id,node)
@@ -569,7 +583,7 @@ def drop_props(self,project_id,node,props,name='temp'):
569583
df = df.drop(columns=[prop])
570584
dropped.append(prop)
571585
except Exception as e:
572-
print("\tCouldn't drop prop '{}' from '{}':\n\t{}".format(prop,node,e))
586+
print("\tCouldn't drop prop '{}' from '{}' TSV:\n\t\t{}".format(prop,node,e))
573587
continue
574588

575589
if len(dropped) > 0:

0 commit comments

Comments
 (0)