From 4e00b7790444dc8d3de240c50a449b2592fd9e50 Mon Sep 17 00:00:00 2001 From: Avik Datta Date: Wed, 25 Apr 2018 10:53:59 +0100 Subject: [PATCH] removed python wrapper scripts --- .../db_scripts/clean_and_rebuild_database.py | 23 ------ .../db_scripts/load_flowcell_rules_data.py | 29 -------- scripts/db_scripts/load_pipeline_data.py | 33 --------- scripts/db_scripts/load_platform_data.py | 30 -------- scripts/db_scripts/load_seqrun_data.py | 25 ------- .../calculate_disk_usage_summary.py | 37 ---------- .../calculate_sub_directory_usage.py | 41 ----------- .../disk_usage/merge_disk_usage_summary.py | 41 ----------- .../find_and_register_project_metdata.py | 44 ----------- .../find_new_seqrun_and_prepare_md5.py | 73 ------------------- 10 files changed, 376 deletions(-) delete mode 100644 scripts/db_scripts/clean_and_rebuild_database.py delete mode 100644 scripts/db_scripts/load_flowcell_rules_data.py delete mode 100644 scripts/db_scripts/load_pipeline_data.py delete mode 100644 scripts/db_scripts/load_platform_data.py delete mode 100644 scripts/db_scripts/load_seqrun_data.py delete mode 100644 scripts/disk_usage/calculate_disk_usage_summary.py delete mode 100644 scripts/disk_usage/calculate_sub_directory_usage.py delete mode 100644 scripts/disk_usage/merge_disk_usage_summary.py delete mode 100644 scripts/seqrun_processing/find_and_register_project_metdata.py delete mode 100644 scripts/seqrun_processing/find_new_seqrun_and_prepare_md5.py diff --git a/scripts/db_scripts/clean_and_rebuild_database.py b/scripts/db_scripts/clean_and_rebuild_database.py deleted file mode 100644 index c5d9503..0000000 --- a/scripts/db_scripts/clean_and_rebuild_database.py +++ /dev/null @@ -1,23 +0,0 @@ -import argparse, json -from igf_data.task_tracking.igf_slack import IGF_slack -from igf_data.utils.dbutils import clean_and_rebuild_database - -parser=argparse.ArgumentParser() -parser.add_argument('-d','--dbconfig_path', required=True, help='Database configuration json file') -parser.add_argument('-s','--slack_config', required=True, help='Slack configuration json file') -args=parser.parse_args() - -dbconfig_path=args.dbconfig_path -slack_config=args.slack_config - -slack_obj=IGF_slack(slack_config=slack_config) - -try: - clean_and_rebuild_database(dbconfig=dbconfig_path) - slack_obj.post_message_to_channel(message='All old data removed from database and new tables are created',reaction='pass') -except Exception as e: - message='Failed to remove old data and create new tables, error: {0}'.format(e) - slack_obj.post_message_to_channel(message,reaction='fail') - raise - - diff --git a/scripts/db_scripts/load_flowcell_rules_data.py b/scripts/db_scripts/load_flowcell_rules_data.py deleted file mode 100644 index a4dc4a2..0000000 --- a/scripts/db_scripts/load_flowcell_rules_data.py +++ /dev/null @@ -1,29 +0,0 @@ -import argparse -from igf_data.task_tracking.igf_slack import IGF_slack -from igf_data.utils.platformutils import load_new_flowcell_data - -parser=argparse.ArgumentParser() -parser.add_argument('-f','--flowcell_data', required=True, help='Flowcell rules data json file') -parser.add_argument('-u','--update', default=False, action='store_true', help='Update existing flowcell rules data, default: False') -parser.add_argument('-d','--dbconfig_path', required=True, help='Database configuration json file') -parser.add_argument('-s','--slack_config', required=True, help='Slack configuration json file') -args=parser.parse_args() - -dbconfig_path=args.dbconfig_path -slack_config=args.slack_config -flowcell_data=args.flowcell_data -update_data=args.update - -slack_obj=IGF_slack(slack_config=slack_config) - -try: - if update_data: - raise NotImplementedError('methods notavailable for updaing existing data') - else: - load_new_flowcell_data(data_file=flowcell_data, dbconfig=dbconfig_path) -except Exception as e: - message='Failed to load data to flowcell rules table, error: {0}'.format(e) - slack_obj.post_message_to_channel(message,reaction='fail') - raise -else: - slack_obj.post_message_to_channel(message='Loaded new flowcell rules info to db',reaction='pass') \ No newline at end of file diff --git a/scripts/db_scripts/load_pipeline_data.py b/scripts/db_scripts/load_pipeline_data.py deleted file mode 100644 index 5bbdd06..0000000 --- a/scripts/db_scripts/load_pipeline_data.py +++ /dev/null @@ -1,33 +0,0 @@ -import argparse -from igf_data.task_tracking.igf_slack import IGF_slack -from igf_data.utils.pipelineutils import load_new_pipeline_data - -parser=argparse.ArgumentParser() -parser.add_argument('-p','--pipeline_data', required=True, help='Pipeline data json file') -parser.add_argument('-u','--update', default=False, action='store_true', help='Update existing platform data, default: False') -parser.add_argument('-d','--dbconfig_path', required=True, help='Database configuration json file') -parser.add_argument('-s','--slack_config', required=True, help='Slack configuration json file') -args=parser.parse_args() - - -dbconfig_path=args.dbconfig_path -slack_config=args.slack_config -pipeline_data=args.pipeline_data -update_data=args.update - -slack_obj=IGF_slack(slack_config=slack_config) - -try: - if update_data: - raise NotImplementedError('methods notavailable for updaing existing data') - else: - load_new_pipeline_data(data_file=pipeline_data, dbconfig=dbconfig_path) -except Exception as e: - message='Failed to load data to pipeline table, error: {0}'.format(e) - slack_obj.post_message_to_channel(message,reaction='fail') - raise -else: - slack_obj.post_message_to_channel(message='Loaded new pipeline info to db',reaction='pass') - - - diff --git a/scripts/db_scripts/load_platform_data.py b/scripts/db_scripts/load_platform_data.py deleted file mode 100644 index 3d87114..0000000 --- a/scripts/db_scripts/load_platform_data.py +++ /dev/null @@ -1,30 +0,0 @@ -import argparse -from igf_data.task_tracking.igf_slack import IGF_slack -from igf_data.utils.platformutils import load_new_platform_data - -parser=argparse.ArgumentParser() -parser.add_argument('-p','--platform_data', required=True, help='Platform data json file') -parser.add_argument('-u','--update', default=False, action='store_true', help='Update existing platform data, default: False') -parser.add_argument('-d','--dbconfig_path', required=True, help='Database configuration json file') -parser.add_argument('-s','--slack_config', required=True, help='Slack configuration json file') -args=parser.parse_args() - - -dbconfig_path=args.dbconfig_path -slack_config=args.slack_config -platform_data=args.platform_data -update_data=args.update - -slack_obj=IGF_slack(slack_config=slack_config) - -try: - if update_data: - raise NotImplementedError('methods notavailable for updaing existing data') - else: - load_new_platform_data(data_file=platform_data, dbconfig=dbconfig_path) -except Exception as e: - message='Failed to load data to platform table, error: {0}'.format(e) - slack_obj.post_message_to_channel(message,reaction='fail') - raise -else: - slack_obj.post_message_to_channel(message='Loaded new platform info to db',reaction='pass') diff --git a/scripts/db_scripts/load_seqrun_data.py b/scripts/db_scripts/load_seqrun_data.py deleted file mode 100644 index b0aabb5..0000000 --- a/scripts/db_scripts/load_seqrun_data.py +++ /dev/null @@ -1,25 +0,0 @@ -import argparse -from igf_data.task_tracking.igf_slack import IGF_slack -from igf_data.utils.seqrunutils import load_new_seqrun_data - - -parser=argparse.ArgumentParser() -parser.add_argument('-p','--seqrun_data', required=True, help='Seqrun data json file') -parser.add_argument('-d','--dbconfig_path', required=True, help='Database configuration json file') -parser.add_argument('-s','--slack_config', required=True, help='Slack configuration json file') -args=parser.parse_args() - -dbconfig_path=args.dbconfig_path -slack_config=args.slack_config -seqrun_data=args.seqrun_data - -slack_obj=IGF_slack(slack_config=slack_config) - -try: - load_new_seqrun_data(data_file=seqrun_data, dbconfig=dbconfig_path) -except Exception as e: - message='Failed to load data to seqrun table, error: {0}'.format(e) - slack_obj.post_message_to_channel(message,reaction='fail') - raise -else: - slack_obj.post_message_to_channel(message='Loaded new seqrun info to db',reaction='pass') \ No newline at end of file diff --git a/scripts/disk_usage/calculate_disk_usage_summary.py b/scripts/disk_usage/calculate_disk_usage_summary.py deleted file mode 100644 index 20b205b..0000000 --- a/scripts/disk_usage/calculate_disk_usage_summary.py +++ /dev/null @@ -1,37 +0,0 @@ -import argparse, shutil, json, os -from igf_data.utils.disk_usage_utils import get_storage_stats_in_gb -from igf_data.utils.fileutils import copy_remote_file, get_temp_dir, remove_dir - -parser=argparse.ArgumentParser() -parser.add_argument('-p','--disk_path', action='append', required=True, help='List of disk path for summary calculation') -parser.add_argument('-c','--copy_to_remoter', default=False, action='store_true', help='Toggle file copy to remote server') -parser.add_argument('-r','--remote_server', required=False, help='Remote server address') -parser.add_argument('-o','--output_path', required=True, help='Output directory path') -args=parser.parse_args() - -disk_path=args.disk_path -copy_to_remoter=args.copy_to_remoter -remote_server=args.remote_server -output_path=args.output_path - -try: - if copy_to_remoter and not remote_server: - parser.print_help() - raise ValueError('Remote server address is required for copying files.') - - storage_stats=get_storage_stats_in_gb(disk_path) # calculate disk usage stats - temp_dir=get_temp_dir() - temp_file=os.path.join(temp_dir,'disk_usage.json') # get temp file path - with open(temp_file, 'w') as j_data: - json.dump(storage_stats,j_data,indent=4) # writing disk usage to temp jeon file - - if copy_to_remoter: - copy_remote_file(source_path=temp_file, - destinationa_path=output_path, - destination_address=remote_server) # copy json file to remote server - else: - shutil.copy2(temp_file, output_path) # copy json file to local server - - remove_dir(temp_dir) # remove temp dir -except Exception as e: - print('Error: {0}'.format(e)) \ No newline at end of file diff --git a/scripts/disk_usage/calculate_sub_directory_usage.py b/scripts/disk_usage/calculate_sub_directory_usage.py deleted file mode 100644 index fdd6fcc..0000000 --- a/scripts/disk_usage/calculate_sub_directory_usage.py +++ /dev/null @@ -1,41 +0,0 @@ -import argparse, shutil, json, os -from igf_data.utils.gviz_utils import convert_to_gviz_json_for_display -from igf_data.utils.disk_usage_utils import get_sub_directory_size_in_gb -from igf_data.utils.fileutils import copy_remote_file, get_temp_dir, remove_dir - -parser=argparse.ArgumentParser() -parser.add_argument('-p','--directory_path', required=True, help='A directory path for sub directory lookup') -parser.add_argument('-c','--copy_to_remoter', default=False, action='store_true', help='Toggle file copy to remote server') -parser.add_argument('-r','--remote_server', required=False, help='Remote server address') -parser.add_argument('-o','--output_filepath', required=True, help='Output gviz file path') -args=parser.parse_args() - -directory_path=args.directory_path -copy_to_remoter=args.copy_to_remoter -remote_server=args.remote_server -output_filepath=args.output_filepath - -try: - if copy_to_remoter and not remote_server: - parser.print_help() - raise ValueError('Remote server address is required for copying files.') - - temp_dir=get_temp_dir() - temp_file=os.path.join(temp_dir,'subdirectory_usage.json') # get temp file path - storage_stats, description, column_order=\ - get_sub_directory_size_in_gb(input_path=directory_path) # calculate sub directory usage stats - convert_to_gviz_json_for_display(description=description, - data=storage_stats, - columns_order=column_order, - output_file=temp_file) # write temp gviz json file - - if copy_to_remoter: - copy_remote_file(source_path=temp_file, - destinationa_path=output_filepath, - destination_address=remote_server) # copy json file to remote server - else: - shutil.copy2(temp_file, output_filepath) # copy json file to local server - - remove_dir(temp_dir) # remove temp dir -except Exception as e: - print('Error: {0}'.format(e)) \ No newline at end of file diff --git a/scripts/disk_usage/merge_disk_usage_summary.py b/scripts/disk_usage/merge_disk_usage_summary.py deleted file mode 100644 index 468d93a..0000000 --- a/scripts/disk_usage/merge_disk_usage_summary.py +++ /dev/null @@ -1,41 +0,0 @@ -import argparse, shutil, json, os -from igf_data.utils.gviz_utils import convert_to_gviz_json_for_display -from igf_data.utils.disk_usage_utils import merge_storage_stats_json -from igf_data.utils.fileutils import copy_remote_file, get_temp_dir, remove_dir - -parser=argparse.ArgumentParser() -parser.add_argument('-f','--config_file', required=True, help='A configuration json file for disk usage summary') -parser.add_argument('-l','--label_file', default=None, help='A json file for disk label name') -parser.add_argument('-c','--copy_to_remoter', default=False, action='store_true', help='Toggle file copy to remote server') -parser.add_argument('-r','--remote_server', required=False, help='Remote server address') -parser.add_argument('-o','--output_filepath', required=True, help='Output gviz file path') -args=parser.parse_args() - -config_file=args.config_file -label_file=args.label_file -copy_to_remoter=args.copy_to_remoter -remote_server=args.remote_server -output_filepath=args.output_filepath - -try: - if copy_to_remoter and not remote_server: - parser.print_help() - raise ValueError('Remote server address is required for copying files.') - - temp_dir=get_temp_dir() - temp_file=os.path.join(temp_dir,'merged_summary_usage.json') # get temp file path - data,description,column_order=merge_storage_stats_json(config_file,label_file) # get merged summary - convert_to_gviz_json_for_display(description=description, - data=data, - columns_order=column_order, - output_file=temp_file) # write temp gviz json file - if copy_to_remoter: - copy_remote_file(source_path=temp_file, - destinationa_path=output_filepath, - destination_address=remote_server) # copy json file to remote server - else: - shutil.copy2(temp_file, output_filepath) # copy json file to local server - - remove_dir(temp_dir) # remove temp dir -except Exception as e: - print(e) \ No newline at end of file diff --git a/scripts/seqrun_processing/find_and_register_project_metdata.py b/scripts/seqrun_processing/find_and_register_project_metdata.py deleted file mode 100644 index ae707c8..0000000 --- a/scripts/seqrun_processing/find_and_register_project_metdata.py +++ /dev/null @@ -1,44 +0,0 @@ -import argparse -from igf_data.process.seqrun_processing.find_and_register_new_project_data import Find_and_register_new_project_data - -parser=argparse.ArgumentParser() -parser.add_argument('-p','--projet_info_path', required=True, help='Project metdata directory path') -parser.add_argument('-d','--dbconfig', required=True, help='Database configuration file path') -parser.add_argument('-t','--user_account_template', required=True, help='User account information email template file path') -parser.add_argument('-s','--log_slack', default=False, action='store_true', help='Toggle slack logging') -parser.add_argument('-n','--slack_config', required=True, help='Slack configuration file path') -parser.add_argument('-c','--check_hpc_user', default=False, action='store_true', help='Toggle HPC user checking') -parser.add_argument('-u','--hpc_user', required=True, help='HPC user name for ldap server checking') -parser.add_argument('-a','--hpc_address', required=True, help='HPC address for ldap server checking') -parser.add_argument('-l','--ldap_server', required=True, help='Ldap server address') -parser.add_argument('-i','--setup_irods', default=False, action='store_true', help='Setup iRODS account for user') -parser.add_argument('-m','--notify_user', default=False, action='store_true', help='Notify user about new account and password') -args=parser.parse_args() - -projet_info_path=args.projet_info_path -dbconfig=args.dbconfig -user_account_template=args.user_account_template -log_slack=args.log_slack -slack_config=args.slack_config -check_hpc_user=args.check_hpc_user -hpc_user=args.hpc_user -hpc_address=args.hpc_address -ldap_server=args.ldap_server -setup_irods=args.setup_irods -notify_user=args.notify_user - -try: - fa=Find_and_register_new_project_data(projet_info_path=projet_info_path,\ - dbconfig=dbconfig,\ - user_account_template=user_account_template,\ - log_slack=log_slack,\ - slack_config=slack_config,\ - check_hpc_user=check_hpc_user,\ - hpc_user=hpc_user,\ - hpc_address=hpc_address,\ - ldap_server=ldap_server,\ - setup_irods=setup_irods,\ - notify_user=notify_user) - fa.process_project_data_and_account() -except Exception as e: - print('ERROR: {0}'.format(e)) \ No newline at end of file diff --git a/scripts/seqrun_processing/find_new_seqrun_and_prepare_md5.py b/scripts/seqrun_processing/find_new_seqrun_and_prepare_md5.py deleted file mode 100644 index 4e6fe4b..0000000 --- a/scripts/seqrun_processing/find_new_seqrun_and_prepare_md5.py +++ /dev/null @@ -1,73 +0,0 @@ -import argparse -from igf_data.task_tracking.igf_slack import IGF_slack -from igf_data.task_tracking.igf_asana import IGF_asana -from igf_data.utils.fileutils import get_temp_dir,remove_dir -from igf_data.process.seqrun_processing.find_and_process_new_seqrun import find_new_seqrun_dir, calculate_file_md5, load_seqrun_files_to_db, seed_pipeline_table_for_new_seqrun,check_for_registered_project_and_sample,validate_samplesheet_for_seqrun - -parser=argparse.ArgumentParser() -parser.add_argument('-p','--seqrun_path', required=True, help='Seqrun directory path') -parser.add_argument('-m','--md5_path', required=True, help='Seqrun md5 output dir') -parser.add_argument('-d','--dbconfig_path', required=True, help='Database configuration json file') -parser.add_argument('-s','--slack_config', required=True, help='Slack configuration json file') -parser.add_argument('-a','--asana_config', required=True, help='Asana configuration json file') -parser.add_argument('-i','--asana_project_id', required=True, help='Asana project id') -parser.add_argument('-n','--pipeline_name', required=True, help='IGF pipeline name') -parser.add_argument('-j','--samplesheet_json_schema', required=True, help='JSON schema for samplesheet validation') -parser.add_argument('-e','--exclude_path', action='append', default=[], help='List of sub directories excluded from the search') -args=parser.parse_args() - -seqrun_path=args.seqrun_path -md5_path=args.md5_path -dbconfig_path=args.dbconfig_path -slack_config=args.slack_config -asana_config=args.asana_config -asana_project_id=args.asana_project_id -pipeline_name=args.pipeline_name -exclude_path=args.exclude_path -samplesheet_json_schema=args.samplesheet_json_schema - -slack_obj=IGF_slack(slack_config=slack_config) -asana_obj=IGF_asana(asana_config=asana_config, asana_project_id=asana_project_id) - -try: - new_seqruns=find_new_seqrun_dir(seqrun_path, dbconfig_path) - new_seqruns,message=check_for_registered_project_and_sample(seqrun_info=new_seqruns,\ - dbconfig=dbconfig_path) - if message !='': - slack_obj.post_message_to_channel(message,reaction='pass') - - if len(new_seqruns.keys()) > 0: - temp_dir=get_temp_dir() # create temp dir - new_seqruns,error_files=validate_samplesheet_for_seqrun(seqrun_info=new_seqruns,\ - schema_json=samplesheet_json_schema,\ - output_dir=temp_dir)# validate samplesheet for seqruns - if len(error_files.keys())>0: - for seqrun_name, error_file_path in error_files.items(): - message='Samplesheet validation failed for run {0}'.format(seqrun_name) - slack_obj.post_file_to_channel(filepath=error_file_path,\ - message=message) # post validation results to slack - - remove_dir(temp_dir) # remove temp dir - - if len(new_seqruns.keys()) > 0: - message='found {0} new sequence runs, calculating md5'.format(len(new_seqruns.keys())) - slack_obj.post_message_to_channel(message,reaction='pass') - - new_seqrun_files_and_md5=calculate_file_md5(seqrun_info=new_seqruns, md5_out=md5_path, seqrun_path=seqrun_path, exclude_dir=exclude_path) - slack_obj.post_message_to_channel(message='finished md5 calculation, loading seqrun to db',reaction='pass') - - load_seqrun_files_to_db(seqrun_info=new_seqruns, seqrun_md5_info=new_seqrun_files_and_md5, dbconfig=dbconfig_path) - seed_pipeline_table_for_new_seqrun(pipeline_name=pipeline_name, dbconfig=dbconfig_path) - - for seqrun_name in new_seqruns.keys(): - message='found new sequencing run {0}'.format(seqrun_name) - res=asana_obj.comment_asana_task(task_name=seqrun_name, comment=message) - slack_obj.post_message_to_channel(message,reaction='pass') - message='New asana task created for seqrun {0}, url: https://app.asana.com/0/{1}/{2}'.format(seqrun_name, asana_project_id, res['target']['id']) - slack_obj.post_message_to_channel(message,reaction='pass') - else: - slack_obj.post_message_to_channel(message='No new sequencing run found',reaction='sleep') -except Exception as e: - message='Failed to load new seqruns, received following error: {0}'.format(e) - slack_obj.post_message_to_channel(message,reaction='fail') - raise