From d7d58dda4afe105c50879fa179652c4b73d7aa98 Mon Sep 17 00:00:00 2001 From: b97pla Date: Wed, 11 Oct 2023 17:56:34 +0200 Subject: [PATCH] fix tar commands --- archive_upload/handlers/dsmc_handlers.py | 113 ++++++++++++++--------- 1 file changed, 69 insertions(+), 44 deletions(-) diff --git a/archive_upload/handlers/dsmc_handlers.py b/archive_upload/handlers/dsmc_handlers.py index a1a2725..0cdace9 100644 --- a/archive_upload/handlers/dsmc_handlers.py +++ b/archive_upload/handlers/dsmc_handlers.py @@ -39,7 +39,7 @@ def initialize(self, config, runner_service): :param config: configuration used by the service :param runner_service: runner service to use. Must fulfill `archive_upload.lib.jobrunner.JobRunnerAdapter` interface """ - self.config = config + self.config = config.get_app_config() self.runner_service = runner_service @staticmethod @@ -402,7 +402,7 @@ def post(self, runfolder_archive): path_to_archive = os.path.join(monitored_dir, runfolder_archive) dsmc_log_root_dir = self.config["log_directory"] - dsmc_extra_args = self.config.get("dsmc_extra_args", "") + dsmc_extra_args = self.config.get("dsmc_extra_args", {}) if not self._is_valid_log_dir(dsmc_log_root_dir): msg = "Error when validating log dir. {} is not a directory.".format(dsmc_log_root_dir) @@ -491,7 +491,7 @@ def post(self, runfolder_archive): path_to_archive = os.path.join(monitored_dir, runfolder_archive) dsmc_log_root_dir = self.config["log_directory"] - dsmc_extra_args = self.config.get("dsmc_extra_args", "") + dsmc_extra_args = self.config.get("dsmc_extra_args", {}) uniq_id = str(uuid.uuid4()) if not self._is_valid_log_dir(dsmc_log_root_dir): @@ -781,6 +781,58 @@ class CompressArchiveHandler(BaseDsmcHandler): Handler for compressing certain files in the archive before uploading. """ + @staticmethod + def _create_tarball_cmd(tarball_name, path_to_archive, exclude_from_tarball): + exclude_patterns = " ".join( + [ + "--exclude={}".format(p) + for p in exclude_from_tarball + [tarball_name] + ] + ) + return "cd {} && " \ + "touch {} && " \ + "tar " \ + "--create " \ + "--gzip " \ + "--dereference " \ + "--hard-dereference " \ + "--file={} " \ + "{} " \ + ".".format( + path_to_archive, + tarball_name, + tarball_name, + exclude_patterns + ) + + @staticmethod + def _remove_tarballed_files_cmd(path_to_archive, tarball_name): + return "cd {} && " \ + "tar " \ + "--list " \ + "--file={} |" \ + "xargs " \ + "-n1 " \ + "rm -f".format( + path_to_archive, + tarball_name + ) + + @staticmethod + def _remove_empty_dirs_cmd(path_to_archive): + return "cd {} && " \ + "find " \ + ". " \ + "-depth " \ + "-mindepth 1 " \ + "-type d |" \ + "xargs " \ + "-n1 " \ + "rmdir " \ + "--ignore-fail-on-non-empty".format( + path_to_archive + ) + def post(self, archive): """ Create a gziped tarball of most files in the archive, with the exception of @@ -799,7 +851,8 @@ def post(self, archive): archive, path_to_archive_root) raise ArchiveException(reason=msg, status_code=400) - tarball_path = "{}.tar.gz".format(os.path.join(path_to_archive, archive)) + tarball_name = "{}.tar.gz".format(archive) + tarball_path = os.path.join(path_to_archive, tarball_name) log.debug("Checking to see if {} exists".format(tarball_path)) @@ -807,48 +860,20 @@ def post(self, archive): msg = "Error when creating archive tarball. {} already exists.".format(tarball_path) raise ArchiveException(reason=msg, status_code=400) - exclude_from_tarball = self.config.get("exclude_from_tarball", []) - exclude_patterns = " ".join( - [ - "--exclude={}".format(p) - for p in exclude_from_tarball - ] + exclude_from_tarball = self.config["exclude_from_tarball"] + cmd = " ( {} ) && ( {} ) ; ( {} )".format( + self._create_tarball_cmd( + tarball_name, + path_to_archive, + exclude_from_tarball), + self._remove_tarballed_files_cmd( + path_to_archive, + tarball_name), + self._remove_empty_dirs_cmd( + path_to_archive) ) - cmd = "cd {} && " \ - "tar " \ - "--create " \ - "--gzip " \ - "--dereference " \ - "--hard-dereference " \ - "--verify " \ - "--file={} " \ - "{} " \ - ".".format( - path_to_archive, - tarball_path, - exclude_patterns - ) - cmd = "{} && " \ - "tar " \ - "--list " \ - "--file={} |" \ - "xargs " \ - "-n1 " \ - "rm".format( - cmd, - tarball_path - ) - cmd = "{} && " \ - "find . " \ - "-depth " \ - "-type d |" \ - "xargs " \ - "-n1 " \ - "rmdir".format( - cmd, - path_to_archive - ) + log.info("run command: {}".format(cmd)) log.info( "Creating tarball {}, then removing files from {} that were added to tarball".format( tarball_path,