diff --git a/ftpSummaryStatsScript/create_ftp_structure.py b/ftpSummaryStatsScript/create_ftp_structure.py index 5b34e8e..666905a 100644 --- a/ftpSummaryStatsScript/create_ftp_structure.py +++ b/ftpSummaryStatsScript/create_ftp_structure.py @@ -22,7 +22,7 @@ def get_gcst_range(gcst): - number_part = int(gcst.split("GCST")[1]) + number_part = int(gcst.split("GCST")[1]) - 1 floor = int(np.fix(number_part / range_size) * range_size) + 1 upper = floor + (range_size -1) range_str = "GCST{f}-GCST{u}".format(f=str(floor).zfill(6), u=str(upper).zfill(6)) diff --git a/ftpSummaryStatsScript/depo_ftp_to_staging.py b/ftpSummaryStatsScript/depo_ftp_to_staging.py index dd817cc..cfe4885 100644 --- a/ftpSummaryStatsScript/depo_ftp_to_staging.py +++ b/ftpSummaryStatsScript/depo_ftp_to_staging.py @@ -23,17 +23,20 @@ # in case of latency for writing the file. MOD_THRESHOLD_SEC = 3600 -#MOD_THRESHOLD_SEC = 36 # DEV ONLY +#MOD_THRESHOLD_SEC = 3 # DEV ONLY RANGE_SIZE = 1000 +RAW_SS_SUFFIX = ".rawSS" +RAW_SS_SUFFIX_REGEX = "\.rawSS$" def get_dirs_to_sync(source_dir): dirs = glob.glob(os.path.join(source_dir, 'GCST*')) + logger.debug(dirs) dirs_older_than_1hr = [d for d in dirs if (time.time() - os.path.getmtime(d)) > MOD_THRESHOLD_SEC] return dirs_older_than_1hr def get_gcst_range(gcst): - number_part = int(gcst.split("GCST")[1]) + number_part = int(gcst.split("GCST")[1]) - 1 floor = int(np.fix(number_part / RANGE_SIZE) * RANGE_SIZE) + 1 upper = floor + (RANGE_SIZE -1) range_str = "GCST{f}-GCST{u}".format(f=str(floor).zfill(6), u=str(upper).zfill(6)) @@ -47,6 +50,15 @@ def move_dir(source, dest): logger.debug("Moving {} --> {}".format(source, dest)) shutil.move(source, dest) +def rename_file(old, new): + logger.info("Renaming {} --> {}".format(old, new)) + os.rename(old, new) + +def remove_raw_ss_suffix(directory): + raw_ss_files = glob.glob(os.path.join(directory, '*' + RAW_SS_SUFFIX)) + for raw_ss in raw_ss_files: + without_raw_ss_suffix = re.sub(RAW_SS_SUFFIX_REGEX, '', raw_ss) + rename_file(raw_ss, without_raw_ss_suffix) def sync_files(source_dir, staging_dir, harmonise_dir): dirs_to_sync = get_dirs_to_sync(source_dir) @@ -60,10 +72,14 @@ def sync_files(source_dir, staging_dir, harmonise_dir): logger.debug(gcst) gcst_range = get_gcst_range(gcst) gcst_range_dir = os.path.join(staging_dir, gcst_range) - dest = gcst_range_dir + "/" + gcst_range_dir = gcst_range_dir + "/" make_dir(gcst_range_dir) - logger.info("Sync {} --> {}".format(study, dest)) - subprocess.call(['rsync', '-prvh','--chmod=Du=rwx,Dg=rwx,Do=rx,Fu=rw,Fg=rw,Fo=r', study, dest]) + logger.info("Sync {} --> {}".format(study, gcst_range_dir)) + # sync to staging + subprocess.call(['rsync', '-prvh','--chmod=Du=rwx,Dg=rwx,Do=rx,Fu=rw,Fg=rw,Fo=r', study, gcst_range_dir]) + dest = os.path.join(gcst_range_dir, gcst) + remove_raw_ss_suffix(dest) + # move to harmonisation move_dir(study, harmonise_dir + "/") diff --git a/harmonisationUtils/sumstats_file_utils.py b/harmonisationUtils/sumstats_file_utils.py index 7626d25..d45c1fa 100644 --- a/harmonisationUtils/sumstats_file_utils.py +++ b/harmonisationUtils/sumstats_file_utils.py @@ -35,7 +35,7 @@ def _list_study_dirs(parent, pattern): def get_gcst_range(gcst): - number_part = int(gcst.split("GCST")[1]) + number_part = int(gcst.split("GCST")[1]) - 1 floor = int(np.fix(number_part / range_size) * range_size) + 1 upper = floor + (range_size -1) range_str = "GCST{f}-GCST{u}".format(f=str(floor).zfill(6), u=str(upper).zfill(6))