From dc9366b7ec11415488a07e81083f56eb209c0bc7 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 15 Nov 2024 00:56:17 -0500 Subject: [PATCH 01/20] Initial draft --- .../clp_package_utils/general.py | 1 + .../clp_package_utils/scripts/del_archives.py | 107 +++++++++++++ .../scripts/native/del_archives.py | 150 ++++++++++++++++++ .../package-template/src/sbin/del-archives.sh | 9 ++ 4 files changed, 267 insertions(+) create mode 100644 components/clp-package-utils/clp_package_utils/scripts/del_archives.py create mode 100644 components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py create mode 100755 components/package-template/src/sbin/del-archives.sh diff --git a/components/clp-package-utils/clp_package_utils/general.py b/components/clp-package-utils/clp_package_utils/general.py index 4dca481b0..1e94991ce 100644 --- a/components/clp-package-utils/clp_package_utils/general.py +++ b/components/clp-package-utils/clp_package_utils/general.py @@ -51,6 +51,7 @@ class JobType(KebabCaseStrEnum): FILE_EXTRACTION = auto() IR_EXTRACTION = auto() SEARCH = auto() + DEL_ARCHIVE = auto() class DockerMount: diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py new file mode 100644 index 000000000..4a8b6cfbf --- /dev/null +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -0,0 +1,107 @@ +import argparse +import logging +import pathlib +import subprocess +import sys + +from clp_package_utils.general import ( + CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, + dump_container_config, + generate_container_config, + generate_container_name, + generate_container_start_cmd, + get_clp_home, + JobType, + load_config_file, + validate_and_load_db_credentials_file, +) + +# Setup logging +# Create logger +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) +# Setup console logging +logging_console_handler = logging.StreamHandler() +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") +logging_console_handler.setFormatter(logging_formatter) +logger.addHandler(logging_console_handler) + + +def main(argv): + clp_home = get_clp_home() + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Prune the out-dated archives.") + args_parser.add_argument( + "--config", + "-c", + default=str(default_config_file_path), + help="CLP package configuration file.", + ) + args_parser.add_argument( + "--begin-ts", + type=int, + default=0, + help="Time range filter lower-bound (inclusive) as milliseconds" " from the UNIX epoch.", + ) + args_parser.add_argument( + "--end-ts", + type=int, + required=True, + help="Time range filter upper-bound (inclusive) as milliseconds" " from the UNIX epoch.", + ) + parsed_args = args_parser.parse_args(argv[1:]) + + # Validate and load config file + try: + config_file_path = pathlib.Path(parsed_args.config) + clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_logs_dir() + + # Validate and load necessary credentials + validate_and_load_db_credentials_file(clp_config, clp_home, False) + except: + logger.exception("Failed to load config.") + return -1 + + # Validate the input timestamp + begin_ts = parsed_args.begin_ts + end_ts = parsed_args.end_ts + if end_ts < 0 or begin_ts < 0: + logger.error("begin_ts and end_ts must be positive.") + return -1 + + container_name = generate_container_name(JobType.DEL_ARCHIVE) + + container_clp_config, mounts = generate_container_config(clp_config, clp_home) + generated_config_path_on_container, generated_config_path_on_host = dump_container_config( + container_clp_config, clp_config, container_name + ) + + necessary_mounts = [mounts.clp_home, mounts.logs_dir, mounts.archives_output_dir] + container_start_cmd = generate_container_start_cmd( + container_name, necessary_mounts, clp_config.execution_container + ) + + # fmt: off + del_archive_cmd = [ + "python3", + "-m", "clp_package_utils.scripts.native.del_archives", + "--config", str(generated_config_path_on_container), + str(begin_ts), + str(end_ts) + + ] + # fmt: on + + cmd = container_start_cmd + del_archive_cmd + subprocess.run(cmd, check=True) + + # Remove generated files + generated_config_path_on_host.unlink() + + return 0 + + +if "__main__" == __name__: + sys.exit(main(sys.argv)) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py new file mode 100644 index 000000000..a312a1679 --- /dev/null +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -0,0 +1,150 @@ +import argparse +import logging +import pathlib +import shutil +import sys +from contextlib import closing +from typing import List, Optional + +from clp_py_utils.clp_config import CLPConfig +from clp_py_utils.sql_adapter import SQL_Adapter + +from clp_package_utils.general import ( + CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, + get_clp_home, + load_config_file, +) + +# Setup logging +# Create logger +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) +# Setup console logging +logging_console_handler = logging.StreamHandler() +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") +logging_console_handler.setFormatter(logging_formatter) +logger.addHandler(logging_console_handler) + + +# Consider deduplicate this +def validate_and_load_config_file( + clp_home: pathlib.Path, + config_file_path: pathlib.Path, + default_config_file_path: pathlib.Path, +) -> Optional[CLPConfig]: + """ + Validates and loads the config file. + :param clp_home: + :param config_file_path: + :param default_config_file_path: + :return: clp_config on success, None otherwise. + """ + try: + clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_archive_output_dir() + clp_config.validate_logs_dir() + return clp_config + except Exception: + logger.exception("Failed to load config.") + return None + + +def handle_file_deletion( + clp_home: pathlib.Path, + config_file_path: pathlib.Path, + default_config_file_path: pathlib.Path, + begin_ts: int, + end_ts: int, +) -> int: + # Validate and load config file + clp_config = validate_and_load_config_file(clp_home, config_file_path, default_config_file_path) + if clp_config is None: + return -1 + + archives_dir = clp_config.archive_output.directory + database_config = clp_config.database + archives_to_delete: List[str] + + logger.info(f"Start deleting archives from database") + try: + sql_adapter = SQL_Adapter(database_config) + clp_db_connection_params = database_config.get_clp_connection_params_and_type(True) + table_prefix = clp_db_connection_params["table_prefix"] + with closing(sql_adapter.create_connection(True)) as db_conn, closing( + db_conn.cursor(dictionary=True) + ) as db_cursor: + db_cursor.execute( + f""" + DELETE FROM `{table_prefix}archives` WHERE + begin_timestamp >= {begin_ts} AND end_timestamp <= {end_ts} + RETURNING id + """ + ) + results = db_cursor.fetchall() + + if 0 == len(results): + logger.info("No archive meets the deletion condition.") + return 0 + + archives_to_delete = [result["id"] for result in results] + db_cursor.execute( + f""" + DELETE FROM `{table_prefix}files` + WHERE archive_id in ({', '.join(['%s'] * len(archives_to_delete))}) + """, + archives_to_delete, + ) + db_conn.commit() + except Exception: + logger.exception("Failed to delete archives from database, abort.") + return -1 + + logger.info(f"Finished deleting archives from the database") + + for archive in archives_to_delete: + logger.info(f"Deleting archive {archive} from the storage") + archive_path = archives_dir / archive + if not archive_path.is_dir(): + logger.warning(f"Archive {archive} is not a directory, skipping deletion") + continue + shutil.rmtree(archive_path) + + logger.info(f"Finished deleting archives") + return 0 + + +def main(argv): + clp_home = get_clp_home() + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Delete out-of-dated archive.") + args_parser.add_argument( + "--config", + "-c", + required=True, + default=str(default_config_file_path), + help="CLP configuration file.", + ) + args_parser.add_argument( + "begin_ts", + type=int, + help="Time range filter lower-bound (inclusive) as milliseconds" " from the UNIX epoch.", + ) + args_parser.add_argument( + "end_ts", + type=int, + help="Time range filter lower-bound (inclusive) as milliseconds" " from the UNIX epoch.", + ) + parsed_args = args_parser.parse_args(argv[1:]) + + return handle_file_deletion( + clp_home, + pathlib.Path(parsed_args.config), + default_config_file_path, + parsed_args.begin_ts, + parsed_args.end_ts, + ) + + +if "__main__" == __name__: + sys.exit(main(sys.argv)) diff --git a/components/package-template/src/sbin/del-archives.sh b/components/package-template/src/sbin/del-archives.sh new file mode 100755 index 000000000..2cd4bef83 --- /dev/null +++ b/components/package-template/src/sbin/del-archives.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +package_root="$script_dir/.." + +PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ + python3 \ + -m clp_package_utils.scripts.del_archives \ + "$@" From 28002447bbe4a962b3ec230e400cc23f41ec4697 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:12:33 -0500 Subject: [PATCH 02/20] Refactor --- .../clp_package_utils/scripts/del_archives.py | 3 + .../scripts/native/del_archives.py | 65 +++++++++---------- .../package-template/src/etc/clp-config.yml | 16 ++--- 3 files changed, 41 insertions(+), 43 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index 4a8b6cfbf..f3dfcaf4d 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -67,6 +67,9 @@ def main(argv): # Validate the input timestamp begin_ts = parsed_args.begin_ts end_ts = parsed_args.end_ts + if begin_ts > end_ts: + logger.error("begin-ts must be less than or equal to end-ts") + return -1 if end_ts < 0 or begin_ts < 0: logger.error("begin_ts and end_ts must be positive.") return -1 diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index a312a1679..5ffe128cc 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -4,9 +4,8 @@ import shutil import sys from contextlib import closing -from typing import List, Optional +from typing import List -from clp_py_utils.clp_config import CLPConfig from clp_py_utils.sql_adapter import SQL_Adapter from clp_package_utils.general import ( @@ -26,52 +25,48 @@ logger.addHandler(logging_console_handler) -# Consider deduplicate this -def validate_and_load_config_file( +def handle_file_deletion( clp_home: pathlib.Path, config_file_path: pathlib.Path, default_config_file_path: pathlib.Path, -) -> Optional[CLPConfig]: + begin_ts: int, + end_ts: int, +) -> int: """ - Validates and loads the config file. + Deletes all archives with `begin_timestamp` and `end_timestamp` within the specified range from + the database, and removes any files associated with these archives. :param clp_home: :param config_file_path: :param default_config_file_path: - :return: clp_config on success, None otherwise. + :param begin_ts: + :param end_ts: + :return: 0 on success, -1 otherwise. """ + + # Validate and load config file try: clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) - clp_config.validate_archive_output_dir() clp_config.validate_logs_dir() - return clp_config - except Exception: + except: logger.exception("Failed to load config.") - return None - - -def handle_file_deletion( - clp_home: pathlib.Path, - config_file_path: pathlib.Path, - default_config_file_path: pathlib.Path, - begin_ts: int, - end_ts: int, -) -> int: - # Validate and load config file - clp_config = validate_and_load_config_file(clp_home, config_file_path, default_config_file_path) - if clp_config is None: return -1 - archives_dir = clp_config.archive_output.directory database_config = clp_config.database - archives_to_delete: List[str] + archives_dir = clp_config.archive_output.directory + # Note, the error message doesn't print value of archives_dir because it is mounted path. + # It could be confusing for user since the path anyway will not exist in their file system. + if not archives_dir.exists(): + logger.error(f"archive directory doesn't exist. abort deletion") + return -1 + archive_ids: List[str] logger.info(f"Start deleting archives from database") try: sql_adapter = SQL_Adapter(database_config) clp_db_connection_params = database_config.get_clp_connection_params_and_type(True) table_prefix = clp_db_connection_params["table_prefix"] with closing(sql_adapter.create_connection(True)) as db_conn, closing( - db_conn.cursor(dictionary=True) + db_conn.cursor(dictionary=True) ) as db_cursor: db_cursor.execute( f""" @@ -83,29 +78,29 @@ def handle_file_deletion( results = db_cursor.fetchall() if 0 == len(results): - logger.info("No archive meets the deletion condition.") + logger.warning("No archive meets the deletion condition.") return 0 - archives_to_delete = [result["id"] for result in results] + archive_ids = [result["id"] for result in results] db_cursor.execute( f""" DELETE FROM `{table_prefix}files` - WHERE archive_id in ({', '.join(['%s'] * len(archives_to_delete))}) + WHERE archive_id in ({', '.join(['%s'] * len(archive_ids))}) """, - archives_to_delete, + archive_ids, ) db_conn.commit() except Exception: - logger.exception("Failed to delete archives from database, abort.") + logger.exception("Failed to delete archives from database, abort deletion") return -1 logger.info(f"Finished deleting archives from the database") - for archive in archives_to_delete: - logger.info(f"Deleting archive {archive} from the storage") - archive_path = archives_dir / archive + for archive_id in archive_ids: + logger.info(f"Deleting archive {archive_id} from the storage") + archive_path = archives_dir / archive_id if not archive_path.is_dir(): - logger.warning(f"Archive {archive} is not a directory, skipping deletion") + logger.warning(f"Archive {archive_id} is not a directory, skipping deletion") continue shutil.rmtree(archive_path) diff --git a/components/package-template/src/etc/clp-config.yml b/components/package-template/src/etc/clp-config.yml index cb66f40cd..647ee05bc 100644 --- a/components/package-template/src/etc/clp-config.yml +++ b/components/package-template/src/etc/clp-config.yml @@ -7,9 +7,9 @@ ## File containing credentials for services #credentials_file_path: "etc/credentials.yml" # -#package: -# storage_engine: "clp" -# +package: + storage_engine: "clp" + #database: # type: "mariadb" # "mariadb" or "mysql" # host: "localhost" @@ -55,11 +55,11 @@ #query_worker: # logging_level: "INFO" # -#webui: -# host: "localhost" -# port: 4000 -# logging_level: "INFO" -# +webui: + host: "localhost" + port: 5000 + logging_level: "INFO" + #log_viewer_webui: # host: "localhost" # port: 3000 From def142e3ecc95f5bdadd0ba536a159ca38769f77 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Sat, 16 Nov 2024 11:28:00 -0500 Subject: [PATCH 03/20] Linter --- .../clp_package_utils/scripts/native/del_archives.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 5ffe128cc..a1cbe5392 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -66,7 +66,7 @@ def handle_file_deletion( clp_db_connection_params = database_config.get_clp_connection_params_and_type(True) table_prefix = clp_db_connection_params["table_prefix"] with closing(sql_adapter.create_connection(True)) as db_conn, closing( - db_conn.cursor(dictionary=True) + db_conn.cursor(dictionary=True) ) as db_cursor: db_cursor.execute( f""" From d1e1f01dfffd49c099411818f545d9d00241701b Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 18 Nov 2024 11:47:52 -0500 Subject: [PATCH 04/20] Remove unintended change --- .../package-template/src/etc/clp-config.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/components/package-template/src/etc/clp-config.yml b/components/package-template/src/etc/clp-config.yml index 647ee05bc..cb66f40cd 100644 --- a/components/package-template/src/etc/clp-config.yml +++ b/components/package-template/src/etc/clp-config.yml @@ -7,9 +7,9 @@ ## File containing credentials for services #credentials_file_path: "etc/credentials.yml" # -package: - storage_engine: "clp" - +#package: +# storage_engine: "clp" +# #database: # type: "mariadb" # "mariadb" or "mysql" # host: "localhost" @@ -55,11 +55,11 @@ package: #query_worker: # logging_level: "INFO" # -webui: - host: "localhost" - port: 5000 - logging_level: "INFO" - +#webui: +# host: "localhost" +# port: 4000 +# logging_level: "INFO" +# #log_viewer_webui: # host: "localhost" # port: 3000 From 67182ba23c77e524ba8ece77f595d3427f1ebbcc Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 18 Nov 2024 20:31:17 -0500 Subject: [PATCH 05/20] Some retouching --- .../clp_package_utils/scripts/native/del_archives.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index a1cbe5392..02c1c9d3b 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -53,8 +53,8 @@ def handle_file_deletion( database_config = clp_config.database archives_dir = clp_config.archive_output.directory - # Note, the error message doesn't print value of archives_dir because it is mounted path. - # It could be confusing for user since the path anyway will not exist in their file system. + # Note, the error message doesn't output the value of archives_dir because it is a mounted + # path. It could be confusing for user because the path will not exist in their file system. if not archives_dir.exists(): logger.error(f"archive directory doesn't exist. abort deletion") return -1 @@ -78,7 +78,7 @@ def handle_file_deletion( results = db_cursor.fetchall() if 0 == len(results): - logger.warning("No archive meets the deletion condition.") + logger.warning("No archive falls into the specified time range, abort deletion") return 0 archive_ids = [result["id"] for result in results] @@ -100,7 +100,7 @@ def handle_file_deletion( logger.info(f"Deleting archive {archive_id} from the storage") archive_path = archives_dir / archive_id if not archive_path.is_dir(): - logger.warning(f"Archive {archive_id} is not a directory, skipping deletion") + logger.warning(f"Archive {archive_id} does not resolve to a directory, skip deletion") continue shutil.rmtree(archive_path) @@ -112,7 +112,9 @@ def main(argv): clp_home = get_clp_home() default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH - args_parser = argparse.ArgumentParser(description="Delete out-of-dated archive.") + args_parser = argparse.ArgumentParser( + description="Delete archives that fall into the specified time range." + ) args_parser.add_argument( "--config", "-c", From 9e8fa85b72782f760b2e12b92d735d5f4f2f6477 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:32:35 -0500 Subject: [PATCH 06/20] Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- .../scripts/native/del_archives.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 02c1c9d3b..c7a55a860 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -91,20 +91,22 @@ def handle_file_deletion( ) db_conn.commit() except Exception: - logger.exception("Failed to delete archives from database, abort deletion") + logger.exception("Failed to delete archives from the database. Aborting deletion.") return -1 - logger.info(f"Finished deleting archives from the database") + logger.info(f"Finished deleting archives from the database.") for archive_id in archive_ids: - logger.info(f"Deleting archive {archive_id} from the storage") + logger.info(f"Deleting archive {archive_id} from disk.") archive_path = archives_dir / archive_id if not archive_path.is_dir(): - logger.warning(f"Archive {archive_id} does not resolve to a directory, skip deletion") + logger.warning(f"Archive {archive_id} is not a directory. Skipping deletion.") continue + shutil.rmtree(archive_path) - logger.info(f"Finished deleting archives") + logger.info(f"Finished deleting archives from disk.") + return 0 @@ -113,7 +115,7 @@ def main(argv): default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH args_parser = argparse.ArgumentParser( - description="Delete archives that fall into the specified time range." + description="Deletes archives that fall within the specified time range." ) args_parser.add_argument( "--config", @@ -125,12 +127,12 @@ def main(argv): args_parser.add_argument( "begin_ts", type=int, - help="Time range filter lower-bound (inclusive) as milliseconds" " from the UNIX epoch.", + help="Time-range lower-bound (inclusive) as milliseconds from the UNIX epoch.", ) args_parser.add_argument( "end_ts", type=int, - help="Time range filter lower-bound (inclusive) as milliseconds" " from the UNIX epoch.", + help="Time-range upper-bound (include) as milliseconds from the UNIX epoch.", ) parsed_args = args_parser.parse_args(argv[1:]) From 2e769166be6aeebdc7724e552cb7f2e3104a2f8a Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:33:44 -0500 Subject: [PATCH 07/20] Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- .../clp_package_utils/scripts/del_archives.py | 4 ++-- .../clp_package_utils/scripts/native/del_archives.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index f3dfcaf4d..5a970baf9 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -68,10 +68,10 @@ def main(argv): begin_ts = parsed_args.begin_ts end_ts = parsed_args.end_ts if begin_ts > end_ts: - logger.error("begin-ts must be less than or equal to end-ts") + logger.error("begin-ts must be <= end-ts") return -1 if end_ts < 0 or begin_ts < 0: - logger.error("begin_ts and end_ts must be positive.") + logger.error("begin_ts and end_ts must be non-negative.") return -1 container_name = generate_container_name(JobType.DEL_ARCHIVE) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index c7a55a860..6f401e7be 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -33,8 +33,8 @@ def handle_file_deletion( end_ts: int, ) -> int: """ - Deletes all archives with `begin_timestamp` and `end_timestamp` within the specified range from - the database, and removes any files associated with these archives. + Deletes all archives where `begin_ts <= archive.begin_timestamp` and + `archive.end_timestamp <= end_ts` from both the metadata database and disk. :param clp_home: :param config_file_path: :param default_config_file_path: @@ -56,11 +56,11 @@ def handle_file_deletion( # Note, the error message doesn't output the value of archives_dir because it is a mounted # path. It could be confusing for user because the path will not exist in their file system. if not archives_dir.exists(): - logger.error(f"archive directory doesn't exist. abort deletion") + logger.error("`archive_output.directory` doesn't exist.") return -1 archive_ids: List[str] - logger.info(f"Start deleting archives from database") + logger.info("Starting to delete archives from the database.") try: sql_adapter = SQL_Adapter(database_config) clp_db_connection_params = database_config.get_clp_connection_params_and_type(True) @@ -78,7 +78,7 @@ def handle_file_deletion( results = db_cursor.fetchall() if 0 == len(results): - logger.warning("No archive falls into the specified time range, abort deletion") + logger.warning("No archives (exclusively) within the specified time range.") return 0 archive_ids = [result["id"] for result in results] From 478d8ca56a139d16382f4c9fe596fdd6a8ee8e0a Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:42:55 -0500 Subject: [PATCH 08/20] Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- .../clp_package_utils/scripts/native/del_archives.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 6f401e7be..4cc24015a 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -53,8 +53,6 @@ def handle_file_deletion( database_config = clp_config.database archives_dir = clp_config.archive_output.directory - # Note, the error message doesn't output the value of archives_dir because it is a mounted - # path. It could be confusing for user because the path will not exist in their file system. if not archives_dir.exists(): logger.error("`archive_output.directory` doesn't exist.") return -1 From b0817106a8c81949f6d10f722cb39c14fd13bc85 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:47:17 -0500 Subject: [PATCH 09/20] address code review comments --- .../clp_package_utils/scripts/del_archives.py | 6 +- .../scripts/native/del_archives.py | 104 +++++++++--------- .../sbin/{ => admin_tools}/del-archives.sh | 0 3 files changed, 54 insertions(+), 56 deletions(-) rename components/package-template/src/sbin/{ => admin_tools}/del-archives.sh (100%) diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index 5a970baf9..aab1d8d8b 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -31,7 +31,7 @@ def main(argv): clp_home = get_clp_home() default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH - args_parser = argparse.ArgumentParser(description="Prune the out-dated archives.") + args_parser = argparse.ArgumentParser(description="Deletes archives that fall within the specified time range.") args_parser.add_argument( "--config", "-c", @@ -42,13 +42,13 @@ def main(argv): "--begin-ts", type=int, default=0, - help="Time range filter lower-bound (inclusive) as milliseconds" " from the UNIX epoch.", + help="Time-range lower-bound (inclusive) as milliseconds from the UNIX epoch.", ) args_parser.add_argument( "--end-ts", type=int, required=True, - help="Time range filter upper-bound (inclusive) as milliseconds" " from the UNIX epoch.", + help="Time-range upper-bound (include) as milliseconds from the UNIX epoch.", ) parsed_args = args_parser.parse_args(argv[1:]) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 4cc24015a..e2b9b98fb 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -6,6 +6,7 @@ from contextlib import closing from typing import List +from clp_py_utils.clp_config import Database from clp_py_utils.sql_adapter import SQL_Adapter from clp_package_utils.general import ( @@ -25,27 +26,35 @@ logger.addHandler(logging_console_handler) -def handle_file_deletion( - clp_home: pathlib.Path, - config_file_path: pathlib.Path, - default_config_file_path: pathlib.Path, - begin_ts: int, - end_ts: int, -) -> int: - """ - Deletes all archives where `begin_ts <= archive.begin_timestamp` and - `archive.end_timestamp <= end_ts` from both the metadata database and disk. - :param clp_home: - :param config_file_path: - :param default_config_file_path: - :param begin_ts: - :param end_ts: - :return: 0 on success, -1 otherwise. - """ +def main(argv): + clp_home = get_clp_home() + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser( + description="Deletes archives that fall within the specified time range." + ) + args_parser.add_argument( + "--config", + "-c", + required=True, + default=str(default_config_file_path), + help="CLP configuration file.", + ) + args_parser.add_argument( + "begin_ts", + type=int, + help="Time-range lower-bound (inclusive) as milliseconds from the UNIX epoch.", + ) + args_parser.add_argument( + "end_ts", + type=int, + help="Time-range upper-bound (include) as milliseconds from the UNIX epoch.", + ) + parsed_args = args_parser.parse_args(argv[1:]) # Validate and load config file try: - clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config = load_config_file(parsed_args.config, default_config_file_path, clp_home) clp_config.validate_logs_dir() except: logger.exception("Failed to load config.") @@ -57,6 +66,30 @@ def handle_file_deletion( logger.error("`archive_output.directory` doesn't exist.") return -1 + return _delete_archives( + archives_dir, + database_config, + parsed_args.begin_ts, + parsed_args.end_ts, + ) + + +def _delete_archives( + archives_dir: pathlib.Path, + database_config: Database, + begin_ts: int, + end_ts: int, +) -> int: + """ + Deletes all archives where `begin_ts <= archive.begin_timestamp` and + `archive.end_timestamp <= end_ts` from both the metadata database and disk. + :param archives_dir: + :param database_config: + :param begin_ts: + :param end_ts: + :return: 0 on success, -1 otherwise. + """ + archive_ids: List[str] logger.info("Starting to delete archives from the database.") try: @@ -108,40 +141,5 @@ def handle_file_deletion( return 0 -def main(argv): - clp_home = get_clp_home() - default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH - - args_parser = argparse.ArgumentParser( - description="Deletes archives that fall within the specified time range." - ) - args_parser.add_argument( - "--config", - "-c", - required=True, - default=str(default_config_file_path), - help="CLP configuration file.", - ) - args_parser.add_argument( - "begin_ts", - type=int, - help="Time-range lower-bound (inclusive) as milliseconds from the UNIX epoch.", - ) - args_parser.add_argument( - "end_ts", - type=int, - help="Time-range upper-bound (include) as milliseconds from the UNIX epoch.", - ) - parsed_args = args_parser.parse_args(argv[1:]) - - return handle_file_deletion( - clp_home, - pathlib.Path(parsed_args.config), - default_config_file_path, - parsed_args.begin_ts, - parsed_args.end_ts, - ) - - if "__main__" == __name__: sys.exit(main(sys.argv)) diff --git a/components/package-template/src/sbin/del-archives.sh b/components/package-template/src/sbin/admin_tools/del-archives.sh similarity index 100% rename from components/package-template/src/sbin/del-archives.sh rename to components/package-template/src/sbin/admin_tools/del-archives.sh From a16e8bcf8e5f3805f49337798357a5dd08f17090 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:24:32 -0500 Subject: [PATCH 10/20] address more code review comments --- .../clp_package_utils/scripts/native/del_archives.py | 7 ++++--- .../package-template/src/sbin/admin_tools/del-archives.sh | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index e2b9b98fb..7a3d5ab41 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -53,8 +53,9 @@ def main(argv): parsed_args = args_parser.parse_args(argv[1:]) # Validate and load config file + config_file_path = pathlib.Path(parsed_args.config) try: - clp_config = load_config_file(parsed_args.config, default_config_file_path, clp_home) + clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) clp_config.validate_logs_dir() except: logger.exception("Failed to load config.") @@ -109,7 +110,7 @@ def _delete_archives( results = db_cursor.fetchall() if 0 == len(results): - logger.warning("No archives (exclusively) within the specified time range.") + logger.info("No archives (exclusively) within the specified time range.") return 0 archive_ids = [result["id"] for result in results] @@ -128,12 +129,12 @@ def _delete_archives( logger.info(f"Finished deleting archives from the database.") for archive_id in archive_ids: - logger.info(f"Deleting archive {archive_id} from disk.") archive_path = archives_dir / archive_id if not archive_path.is_dir(): logger.warning(f"Archive {archive_id} is not a directory. Skipping deletion.") continue + logger.info(f"Deleting archive {archive_id} from disk.") shutil.rmtree(archive_path) logger.info(f"Finished deleting archives from disk.") diff --git a/components/package-template/src/sbin/admin_tools/del-archives.sh b/components/package-template/src/sbin/admin_tools/del-archives.sh index 2cd4bef83..4d7ebc6b7 100755 --- a/components/package-template/src/sbin/admin_tools/del-archives.sh +++ b/components/package-template/src/sbin/admin_tools/del-archives.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -package_root="$script_dir/.." +package_root="$script_dir/../.." PYTHONPATH=$(readlink -f "$package_root/lib/python3/site-packages") \ python3 \ From 816892a94bae011a964be553e5ba4135753f1862 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:27:42 -0500 Subject: [PATCH 11/20] Linter --- .../clp_package_utils/scripts/del_archives.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index aab1d8d8b..841d7c8c8 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -31,7 +31,9 @@ def main(argv): clp_home = get_clp_home() default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH - args_parser = argparse.ArgumentParser(description="Deletes archives that fall within the specified time range.") + args_parser = argparse.ArgumentParser( + description="Deletes archives that fall within the specified time range." + ) args_parser.add_argument( "--config", "-c", From 4e73e4d25e861a256cf0612a5a936e78c3c3435c Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 25 Nov 2024 21:26:43 -0500 Subject: [PATCH 12/20] Address code review comments --- components/clp-package-utils/clp_package_utils/general.py | 2 +- .../clp-package-utils/clp_package_utils/scripts/compress.py | 2 +- .../clp_package_utils/scripts/decompress.py | 2 +- .../clp_package_utils/scripts/del_archives.py | 6 +++--- .../clp_package_utils/scripts/native/del_archives.py | 6 +++--- .../clp-package-utils/clp_package_utils/scripts/search.py | 2 +- .../src/sbin/{admin_tools => admin-tools}/del-archives.sh | 0 7 files changed, 10 insertions(+), 10 deletions(-) rename components/package-template/src/sbin/{admin_tools => admin-tools}/del-archives.sh (100%) diff --git a/components/clp-package-utils/clp_package_utils/general.py b/components/clp-package-utils/clp_package_utils/general.py index 1e94991ce..32600a452 100644 --- a/components/clp-package-utils/clp_package_utils/general.py +++ b/components/clp-package-utils/clp_package_utils/general.py @@ -107,7 +107,7 @@ def get_clp_home(): return clp_home.resolve() -def generate_container_name(job_type: JobType) -> str: +def generate_container_name(job_type: str) -> str: """ :param job_type: :return: A unique container name for the given job type. diff --git a/components/clp-package-utils/clp_package_utils/scripts/compress.py b/components/clp-package-utils/clp_package_utils/scripts/compress.py index d0aa30913..fc6af4853 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/compress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/compress.py @@ -66,7 +66,7 @@ def main(argv): logger.exception("Failed to load config.") return -1 - container_name = generate_container_name(JobType.COMPRESSION) + container_name = generate_container_name(str(JobType.COMPRESSION)) container_clp_config, mounts = generate_container_config(clp_config, clp_home) generated_config_path_on_container, generated_config_path_on_host = dump_container_config( diff --git a/components/clp-package-utils/clp_package_utils/scripts/decompress.py b/components/clp-package-utils/clp_package_utils/scripts/decompress.py index 1a2973fec..29f011379 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/decompress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/decompress.py @@ -88,7 +88,7 @@ def handle_extract_file_cmd( if clp_config is None: return -1 - container_name = generate_container_name(JobType.FILE_EXTRACTION) + container_name = generate_container_name(str(JobType.FILE_EXTRACTION)) container_clp_config, mounts = generate_container_config(clp_config, clp_home) generated_config_path_on_container, generated_config_path_on_host = dump_container_config( container_clp_config, clp_config, container_name diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index 841d7c8c8..99bf75568 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -1,6 +1,6 @@ import argparse import logging -import pathlib +from pathlib import Path import subprocess import sys @@ -56,7 +56,7 @@ def main(argv): # Validate and load config file try: - config_file_path = pathlib.Path(parsed_args.config) + config_file_path = Path(parsed_args.config) clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) clp_config.validate_logs_dir() @@ -76,7 +76,7 @@ def main(argv): logger.error("begin_ts and end_ts must be non-negative.") return -1 - container_name = generate_container_name(JobType.DEL_ARCHIVE) + container_name = generate_container_name(str(JobType.DEL_ARCHIVE)) container_clp_config, mounts = generate_container_config(clp_config, clp_home) generated_config_path_on_container, generated_config_path_on_host = dump_container_config( diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 7a3d5ab41..281fe34fd 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -1,6 +1,6 @@ import argparse import logging -import pathlib +from pathlib import Path import shutil import sys from contextlib import closing @@ -53,7 +53,7 @@ def main(argv): parsed_args = args_parser.parse_args(argv[1:]) # Validate and load config file - config_file_path = pathlib.Path(parsed_args.config) + config_file_path = Path(parsed_args.config) try: clp_config = load_config_file(config_file_path, default_config_file_path, clp_home) clp_config.validate_logs_dir() @@ -76,7 +76,7 @@ def main(argv): def _delete_archives( - archives_dir: pathlib.Path, + archives_dir: Path, database_config: Database, begin_ts: int, end_ts: int, diff --git a/components/clp-package-utils/clp_package_utils/scripts/search.py b/components/clp-package-utils/clp_package_utils/scripts/search.py index f3f02046d..67c561bc3 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/search.py +++ b/components/clp-package-utils/clp_package_utils/scripts/search.py @@ -82,7 +82,7 @@ def main(argv): logger.exception("Failed to load config.") return -1 - container_name = generate_container_name(JobType.SEARCH) + container_name = generate_container_name(str(JobType.SEARCH)) container_clp_config, mounts = generate_container_config(clp_config, clp_home) generated_config_path_on_container, generated_config_path_on_host = dump_container_config( diff --git a/components/package-template/src/sbin/admin_tools/del-archives.sh b/components/package-template/src/sbin/admin-tools/del-archives.sh similarity index 100% rename from components/package-template/src/sbin/admin_tools/del-archives.sh rename to components/package-template/src/sbin/admin-tools/del-archives.sh From 0a318479f6233f0bc81f0fe0a0ad46499e51a5c6 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 25 Nov 2024 22:44:09 -0500 Subject: [PATCH 13/20] linter --- .../clp-package-utils/clp_package_utils/scripts/del_archives.py | 2 +- .../clp_package_utils/scripts/native/del_archives.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index 99bf75568..f4c600e66 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -1,8 +1,8 @@ import argparse import logging -from pathlib import Path import subprocess import sys +from pathlib import Path from clp_package_utils.general import ( CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 281fe34fd..3cc85c2c1 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -1,9 +1,9 @@ import argparse import logging -from pathlib import Path import shutil import sys from contextlib import closing +from pathlib import Path from typing import List from clp_py_utils.clp_config import Database From 9449c52ee66e364ed9d1b815da9cafa9fffdfd1b Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:03:11 -0500 Subject: [PATCH 14/20] Missing change --- .../clp-package-utils/clp_package_utils/scripts/decompress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/decompress.py b/components/clp-package-utils/clp_package_utils/scripts/decompress.py index 29f011379..2cafbc994 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/decompress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/decompress.py @@ -163,7 +163,7 @@ def handle_extract_ir_cmd( if clp_config is None: return -1 - container_name = generate_container_name(JobType.IR_EXTRACTION) + container_name = generate_container_name(str(JobType.IR_EXTRACTION)) container_clp_config, mounts = generate_container_config(clp_config, clp_home) generated_config_path_on_container, generated_config_path_on_host = dump_container_config( container_clp_config, clp_config, container_name From 112797369cf679dfe11597e09b97c37551e364b4 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:11:29 -0500 Subject: [PATCH 15/20] Use centralized logging setup --- .../clp-package-utils/clp_package_utils/__init__.py | 12 ++++++++++++ .../clp_package_utils/scripts/compress.py | 7 ------- .../clp_package_utils/scripts/decompress.py | 9 +-------- .../clp_package_utils/scripts/del_archives.py | 7 ------- .../clp_package_utils/scripts/native/compress.py | 7 ------- .../clp_package_utils/scripts/native/decompress.py | 7 ------- .../clp_package_utils/scripts/native/del_archives.py | 7 ------- .../clp_package_utils/scripts/native/search.py | 7 ------- .../clp_package_utils/scripts/search.py | 7 ------- .../clp_package_utils/scripts/start_clp.py | 9 +-------- .../clp_package_utils/scripts/stop_clp.py | 9 +-------- 11 files changed, 15 insertions(+), 73 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/__init__.py b/components/clp-package-utils/clp_package_utils/__init__.py index e69de29bb..96907ee03 100644 --- a/components/clp-package-utils/clp_package_utils/__init__.py +++ b/components/clp-package-utils/clp_package_utils/__init__.py @@ -0,0 +1,12 @@ +import logging + +# Set up console logging +logging_console_handler = logging.StreamHandler() +logging_formatter = logging.Formatter( + "%(asctime)s.%(msecs)03d %(levelname)s [%(module)s] %(message)s", datefmt="%Y-%m-%dT%H:%M:%S" +) +logging_console_handler.setFormatter(logging_formatter) +# Set up root logger +root_logger = logging.getLogger() +root_logger.setLevel(logging.INFO) +root_logger.addHandler(logging_console_handler) diff --git a/components/clp-package-utils/clp_package_utils/scripts/compress.py b/components/clp-package-utils/clp_package_utils/scripts/compress.py index fc6af4853..3cae6228a 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/compress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/compress.py @@ -18,15 +18,8 @@ validate_and_load_db_credentials_file, ) -# Setup logging # Create logger logger = logging.getLogger(__file__) -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) def main(argv): diff --git a/components/clp-package-utils/clp_package_utils/scripts/decompress.py b/components/clp-package-utils/clp_package_utils/scripts/decompress.py index 2cafbc994..d31e2f34a 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/decompress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/decompress.py @@ -24,15 +24,8 @@ validate_path_could_be_dir, ) -# Setup logging # Create logger -logger = logging.getLogger("clp") -logger.setLevel(logging.DEBUG) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) +logger = logging.getLogger(__file__) def validate_and_load_config( diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index f4c600e66..f4f69d02c 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -16,15 +16,8 @@ validate_and_load_db_credentials_file, ) -# Setup logging # Create logger logger = logging.getLogger(__file__) -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) def main(argv): diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/compress.py b/components/clp-package-utils/clp_package_utils/scripts/native/compress.py index cb495204f..ce301ca6a 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/native/compress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/compress.py @@ -23,15 +23,8 @@ load_config_file, ) -# Setup logging # Create logger logger = logging.getLogger(__file__) -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) def print_compression_job_status(job_row, current_time): diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py b/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py index b6585b192..c5db8210c 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py @@ -27,15 +27,8 @@ wait_for_query_job, ) -# Setup logging # Create logger logger = logging.getLogger(__file__) -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) def get_orig_file_id(db_config: Database, path: str) -> Optional[str]: diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 3cc85c2c1..0bb54e25f 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -15,15 +15,8 @@ load_config_file, ) -# Setup logging # Create logger logger = logging.getLogger(__file__) -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) def main(argv): diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/search.py b/components/clp-package-utils/clp_package_utils/scripts/native/search.py index 7dd247fa5..655fb2ad8 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/native/search.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/search.py @@ -26,15 +26,8 @@ wait_for_query_job, ) -# Setup logging # Create logger logger = logging.getLogger(__file__) -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) def create_and_monitor_job_in_db( diff --git a/components/clp-package-utils/clp_package_utils/scripts/search.py b/components/clp-package-utils/clp_package_utils/scripts/search.py index 67c561bc3..4c6fb2bba 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/search.py +++ b/components/clp-package-utils/clp_package_utils/scripts/search.py @@ -20,15 +20,8 @@ validate_and_load_db_credentials_file, ) -# Setup logging # Create logger logger = logging.getLogger(__file__) -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) def main(argv): diff --git a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py index 7c6de0200..07719c56b 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py +++ b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py @@ -59,15 +59,8 @@ validate_worker_config, ) -# Setup logging # Create logger -logger = logging.getLogger("clp") -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) +logger = logging.getLogger(__file__) def container_exists(container_name): diff --git a/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py b/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py index f100a098a..69943ca5b 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py +++ b/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py @@ -31,15 +31,8 @@ validate_and_load_queue_credentials_file, ) -# Setup logging # Create logger -logger = logging.getLogger("clp") -logger.setLevel(logging.INFO) -# Setup console logging -logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") -logging_console_handler.setFormatter(logging_formatter) -logger.addHandler(logging_console_handler) +logger = logging.getLogger(__file__) def stop_running_container(container_name: str, already_exited_containers: List[str], force: bool): From 59e0cae009a7d1accbe263dbb15263602152ac10 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:18:04 -0500 Subject: [PATCH 16/20] Remove unnecessary job type --- components/clp-package-utils/clp_package_utils/general.py | 1 - .../clp-package-utils/clp_package_utils/scripts/del_archives.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/general.py b/components/clp-package-utils/clp_package_utils/general.py index 32600a452..f7055b675 100644 --- a/components/clp-package-utils/clp_package_utils/general.py +++ b/components/clp-package-utils/clp_package_utils/general.py @@ -51,7 +51,6 @@ class JobType(KebabCaseStrEnum): FILE_EXTRACTION = auto() IR_EXTRACTION = auto() SEARCH = auto() - DEL_ARCHIVE = auto() class DockerMount: diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index f4f69d02c..3fd9bbe2a 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -69,7 +69,7 @@ def main(argv): logger.error("begin_ts and end_ts must be non-negative.") return -1 - container_name = generate_container_name(str(JobType.DEL_ARCHIVE)) + container_name = generate_container_name("del-archives") container_clp_config, mounts = generate_container_config(clp_config, clp_home) generated_config_path_on_container, generated_config_path_on_host = dump_container_config( From fe76015191f7e54350f63fe04e1c993b44f1bbe8 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:24:04 -0500 Subject: [PATCH 17/20] use templated query --- .../clp_package_utils/scripts/native/del_archives.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 0bb54e25f..73df95922 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -95,10 +95,11 @@ def _delete_archives( ) as db_cursor: db_cursor.execute( f""" - DELETE FROM `{table_prefix}archives` WHERE - begin_timestamp >= {begin_ts} AND end_timestamp <= {end_ts} + DELETE FROM `{table_prefix}archives` + WHERE begin_timestamp >= %s AND end_timestamp <= %s RETURNING id - """ + """, + (begin_ts, end_ts) ) results = db_cursor.fetchall() From e3306dad6613494e7677e9e1908bc35e22b41024 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:24:36 -0500 Subject: [PATCH 18/20] Linter --- .../clp_package_utils/scripts/native/del_archives.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index 73df95922..e75292894 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -99,7 +99,7 @@ def _delete_archives( WHERE begin_timestamp >= %s AND end_timestamp <= %s RETURNING id """, - (begin_ts, end_ts) + (begin_ts, end_ts), ) results = db_cursor.fetchall() From 1548ae3a2ff1069dd9f4d88fada2e39f171accde Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Tue, 26 Nov 2024 19:57:53 -0500 Subject: [PATCH 19/20] Update components/clp-package-utils/clp_package_utils/__init__.py Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- components/clp-package-utils/clp_package_utils/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/components/clp-package-utils/clp_package_utils/__init__.py b/components/clp-package-utils/clp_package_utils/__init__.py index 96907ee03..5253a87e5 100644 --- a/components/clp-package-utils/clp_package_utils/__init__.py +++ b/components/clp-package-utils/clp_package_utils/__init__.py @@ -6,6 +6,7 @@ "%(asctime)s.%(msecs)03d %(levelname)s [%(module)s] %(message)s", datefmt="%Y-%m-%dT%H:%M:%S" ) logging_console_handler.setFormatter(logging_formatter) + # Set up root logger root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) From 73b656b1d93e8379434912a9bdad450cd088722d Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 26 Nov 2024 20:17:07 -0500 Subject: [PATCH 20/20] remove comment --- .../clp-package-utils/clp_package_utils/scripts/compress.py | 1 - .../clp-package-utils/clp_package_utils/scripts/decompress.py | 1 - .../clp-package-utils/clp_package_utils/scripts/del_archives.py | 2 -- .../clp_package_utils/scripts/native/compress.py | 1 - .../clp_package_utils/scripts/native/decompress.py | 1 - .../clp_package_utils/scripts/native/del_archives.py | 1 - .../clp_package_utils/scripts/native/search.py | 1 - .../clp-package-utils/clp_package_utils/scripts/search.py | 1 - .../clp-package-utils/clp_package_utils/scripts/start_clp.py | 1 - .../clp-package-utils/clp_package_utils/scripts/stop_clp.py | 1 - 10 files changed, 11 deletions(-) diff --git a/components/clp-package-utils/clp_package_utils/scripts/compress.py b/components/clp-package-utils/clp_package_utils/scripts/compress.py index 3cae6228a..efd3180ae 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/compress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/compress.py @@ -18,7 +18,6 @@ validate_and_load_db_credentials_file, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/decompress.py b/components/clp-package-utils/clp_package_utils/scripts/decompress.py index d31e2f34a..63fd98b70 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/decompress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/decompress.py @@ -24,7 +24,6 @@ validate_path_could_be_dir, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py index 3fd9bbe2a..54d959771 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/del_archives.py @@ -11,12 +11,10 @@ generate_container_name, generate_container_start_cmd, get_clp_home, - JobType, load_config_file, validate_and_load_db_credentials_file, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/compress.py b/components/clp-package-utils/clp_package_utils/scripts/native/compress.py index ce301ca6a..b6d9bb7eb 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/native/compress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/compress.py @@ -23,7 +23,6 @@ load_config_file, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py b/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py index c5db8210c..54ef73d97 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/decompress.py @@ -27,7 +27,6 @@ wait_for_query_job, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py index e75292894..735bf299d 100644 --- a/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/del_archives.py @@ -15,7 +15,6 @@ load_config_file, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/native/search.py b/components/clp-package-utils/clp_package_utils/scripts/native/search.py index 655fb2ad8..d166cf35f 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/native/search.py +++ b/components/clp-package-utils/clp_package_utils/scripts/native/search.py @@ -26,7 +26,6 @@ wait_for_query_job, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/search.py b/components/clp-package-utils/clp_package_utils/scripts/search.py index 4c6fb2bba..beb7fb0b0 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/search.py +++ b/components/clp-package-utils/clp_package_utils/scripts/search.py @@ -20,7 +20,6 @@ validate_and_load_db_credentials_file, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py index 07719c56b..641e70531 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py +++ b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py @@ -59,7 +59,6 @@ validate_worker_config, ) -# Create logger logger = logging.getLogger(__file__) diff --git a/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py b/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py index 69943ca5b..a55d7a795 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py +++ b/components/clp-package-utils/clp_package_utils/scripts/stop_clp.py @@ -31,7 +31,6 @@ validate_and_load_queue_credentials_file, ) -# Create logger logger = logging.getLogger(__file__)