From adbe13dc3cd2fbef7abb34f86c4f5c804afe660b Mon Sep 17 00:00:00 2001 From: "J.C. Jones" Date: Tue, 13 Sep 2022 09:57:41 -0700 Subject: [PATCH] Set WAIT 2 for all ALTER TABLE REORGANIZE PARTITION calls (#57) * Set WAIT 2 for all ALTER TABLE REORGANIZE PARTITION calls * Ensure alter_time_seconds gets updated even on nonzero SQL exits. Thanks @mcpherrinm * Add a metric for alter_errors and reorder logs to be clearer on large tables --- partitionmanager/cli.py | 29 ++++++++++++++----- partitionmanager/cli_test.py | 16 +++++----- partitionmanager/migrate_test.py | 4 +-- partitionmanager/sql.py | 26 +++++++++++------ partitionmanager/table_append_partition.py | 2 +- .../table_append_partition_test.py | 20 ++++++------- partitionmanager/types.py | 4 +++ 7 files changed, 65 insertions(+), 36 deletions(-) diff --git a/partitionmanager/cli.py b/partitionmanager/cli.py index 1dd5ef0..6e319ff 100644 --- a/partitionmanager/cli.py +++ b/partitionmanager/cli.py @@ -296,9 +296,15 @@ def do_partition(conf): help_text="Time in seconds to complete the ALTER command", type_name="gauge", ) + metrics.describe( + "alter_errors", + help_text="Number of errors observed during ALTER commands", + type_name="counter", + ) all_results = dict() for table in conf.tables: + time_start = None try: table_problems = pm_tap.get_table_compatibility_problems(conf.dbcmd, table) if table_problems: @@ -311,11 +317,13 @@ def do_partition(conf): if table.partition_period: duration = table.partition_period + log.info(f"Evaluating {table} (duration={duration})") + positions = pm_tap.get_current_positions( conf.dbcmd, table, map_data["range_cols"] ) - log.info(f"Evaluating {table} (duration={duration}) (pos={positions})") + log.info(f"{table} (pos={positions})") cur_pos = partitionmanager.types.Position() cur_pos.set_position([positions[col] for col in map_data["range_cols"]]) @@ -344,22 +352,29 @@ def do_partition(conf): log.info(f"{table} running SQL: {composite_sql_command}") time_start = datetime.utcnow() output = conf.dbcmd.run(composite_sql_command) - time_end = datetime.utcnow() all_results[table.name] = {"sql": composite_sql_command, "output": output} log.info(f"{table} results: {output}") - metrics.add( - "alter_time_seconds", - table.name, - (time_end - time_start).total_seconds(), - ) + except partitionmanager.types.NoEmptyPartitionsAvailableException: log.warning( f"Unable to automatically handle {table}: No empty " "partition is available." ) + except partitionmanager.types.DatabaseCommandException as e: + log.warning("Failed to automatically handle %s: %s", table, e) + metrics.add("alter_errors", table.name, 1) except (ValueError, Exception) as e: log.warning("Failed to handle %s: %s", table, e) + metrics.add("alter_errors", table.name, 1) + + time_end = datetime.utcnow() + if time_start: + metrics.add( + "alter_time_seconds", + table.name, + (time_end - time_start).total_seconds(), + ) if conf.prometheus_stats_path: do_stats(conf, metrics=metrics) diff --git a/partitionmanager/cli_test.py b/partitionmanager/cli_test.py index c58c7c4..4308187 100644 --- a/partitionmanager/cli_test.py +++ b/partitionmanager/cli_test.py @@ -80,7 +80,7 @@ def test_partition_cmd_noop(self): { "testtable_noop": { "sql": ( - "ALTER TABLE `testtable_noop` REORGANIZE PARTITION " + "ALTER TABLE `testtable_noop` WAIT 2 REORGANIZE PARTITION " "`p_20201204` INTO " "(PARTITION `p_20201112` VALUES LESS THAN (548), " "PARTITION `p_20201212` VALUES LESS THAN MAXVALUE);" @@ -102,7 +102,7 @@ def test_partition_cmd_final(self): "testtable_commit": { "output": [], "sql": ( - "ALTER TABLE `testtable_commit` REORGANIZE PARTITION " + "ALTER TABLE `testtable_commit` WAIT 2 REORGANIZE PARTITION " "`p_20201204` INTO " "(PARTITION `p_20201112` VALUES LESS THAN (548), " "PARTITION `p_20201212` VALUES LESS THAN MAXVALUE);" @@ -223,10 +223,12 @@ def test_partition_period_seven_days(self): set( [ "INFO:partition:Evaluating Table partitioned_last_week " - "(duration=7 days, 0:00:00) (pos={'id': 150})", + "(duration=7 days, 0:00:00)", + "INFO:partition:Table partitioned_last_week (pos={'id': 150})", "DEBUG:partition:Table partitioned_last_week has no pending SQL updates.", "INFO:partition:Evaluating Table partitioned_yesterday " - "(duration=7 days, 0:00:00) (pos={'id': 150})", + "(duration=7 days, 0:00:00)", + "INFO:partition:Table partitioned_yesterday (pos={'id': 150})", "DEBUG:partition:Table partitioned_yesterday has no pending SQL updates.", ] ), @@ -500,7 +502,7 @@ def test_migrate_cmd_in(self): + "PARTITION BY RANGE (id) (", "\tPARTITION p_assumed VALUES LESS THAN MAXVALUE", ");", - "ALTER TABLE `partitioned_yesterday_new_20210421` " + "ALTER TABLE `partitioned_yesterday_new_20210421` WAIT 2 " + "REORGANIZE PARTITION `p_assumed` INTO (PARTITION " + "`p_20210421` VALUES LESS THAN (150), PARTITION " + "`p_20210521` VALUES LESS THAN (300), PARTITION " @@ -525,7 +527,7 @@ def test_migrate_cmd_in(self): "ALTER TABLE two_new_20210421 PARTITION BY RANGE (id) (", "\tPARTITION p_assumed VALUES LESS THAN MAXVALUE", ");", - "ALTER TABLE `two_new_20210421` REORGANIZE PARTITION " + "ALTER TABLE `two_new_20210421` WAIT 2 REORGANIZE PARTITION " + "`p_assumed` INTO (PARTITION `p_20210421` VALUES " + "LESS THAN (150), PARTITION `p_20210521` VALUES LESS " + "THAN (375), PARTITION `p_20210620` VALUES LESS THAN " @@ -585,7 +587,7 @@ def test_migrate_cmd_in_unpartitioned_with_override(self): "ALTER TABLE unpartitioned_new_20210421 PARTITION BY RANGE (id) (", "\tPARTITION p_assumed VALUES LESS THAN MAXVALUE", ");", - "ALTER TABLE `unpartitioned_new_20210421` REORGANIZE " + "ALTER TABLE `unpartitioned_new_20210421` WAIT 2 REORGANIZE " + "PARTITION `p_assumed` INTO (PARTITION `p_20210421` " + "VALUES LESS THAN (150), PARTITION `p_20210521` VALUES " + "LESS THAN (300), PARTITION `p_20210620` VALUES LESS " diff --git a/partitionmanager/migrate_test.py b/partitionmanager/migrate_test.py index d834b3e..9ee39d3 100644 --- a/partitionmanager/migrate_test.py +++ b/partitionmanager/migrate_test.py @@ -131,7 +131,7 @@ def test_read_state_info(self): "ALTER TABLE test_new_20210303 PARTITION BY RANGE (id) (", "\tPARTITION p_start VALUES LESS THAN MAXVALUE", ");", - "ALTER TABLE `test_new_20210303` REORGANIZE PARTITION `p_start` " + "ALTER TABLE `test_new_20210303` WAIT 2 REORGANIZE PARTITION `p_start` " + "INTO (PARTITION `p_20210303` VALUES LESS THAN (156), " + "PARTITION `p_20210402` VALUES LESS THAN (2406), PARTITION " + "`p_20210502` VALUES LESS THAN MAXVALUE);", @@ -187,7 +187,7 @@ def test_read_state_info_map_table(self): + "COLUMNS (orderID, authzID) (", "\tPARTITION p_assumed VALUES LESS THAN (MAXVALUE, MAXVALUE)", ");", - "ALTER TABLE `map_table_new_20210303` REORGANIZE PARTITION " + "ALTER TABLE `map_table_new_20210303` WAIT 2 REORGANIZE PARTITION " + "`p_assumed` INTO (PARTITION `p_20210303` VALUES LESS THAN " + "(11, 22), PARTITION `p_20210402` VALUES LESS THAN " + "(41, 82), PARTITION `p_20210502` VALUES LESS THAN " diff --git a/partitionmanager/sql.py b/partitionmanager/sql.py index be49d5f..8ef9a3b 100644 --- a/partitionmanager/sql.py +++ b/partitionmanager/sql.py @@ -123,15 +123,23 @@ def __init__(self, exe): def run(self, sql_cmd): logging.debug(f"SubprocessDatabaseCommand executing {sql_cmd}") - result = subprocess.run( - [self.exe, "-X"], - input=sql_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - encoding="UTF-8", - check=True, - ) - return XmlResult().parse(result.stdout) + try: + result = subprocess.run( + [self.exe, "-X"], + input=sql_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + encoding="UTF-8", + check=True, + ) + return XmlResult().parse(result.stdout) + except subprocess.CalledProcessError as cpe: + logging.error( + "SubprocessDatabaseCommand failed, error code %d", cpe.returncode + ) + logging.error("stdout: %s", cpe.stdout) + logging.error("stderr: %s", cpe.stderr) + raise partitionmanager.types.DatabaseCommandException(cpe.stderr) def db_name(self): rows = self.run("SELECT DATABASE();") diff --git a/partitionmanager/table_append_partition.py b/partitionmanager/table_append_partition.py index d2ad097..eb3617b 100644 --- a/partitionmanager/table_append_partition.py +++ b/partitionmanager/table_append_partition.py @@ -729,7 +729,7 @@ def generate_sql_reorganize_partition_commands(table, changes): partition_update = ", ".join(partition_strings) alter_cmd = ( - f"ALTER TABLE `{table.name}` " + f"ALTER TABLE `{table.name}` WAIT 2 " f"REORGANIZE PARTITION `{modified_partition.old.name}` INTO ({partition_update});" ) diff --git a/partitionmanager/table_append_partition_test.py b/partitionmanager/table_append_partition_test.py index 45cb6d9..6b3f308 100644 --- a/partitionmanager/table_append_partition_test.py +++ b/partitionmanager/table_append_partition_test.py @@ -669,7 +669,7 @@ def test_plan_partition_changes_short_names(self): self.assertEqual( output, [ - "ALTER TABLE `table` REORGANIZE PARTITION `p_future` INTO " + "ALTER TABLE `table` WAIT 2 REORGANIZE PARTITION `p_future` INTO " "(PARTITION `p_20210201` VALUES LESS THAN (12960433003), " "PARTITION `p_20210208` VALUES LESS THAN MAXVALUE);" ], @@ -705,7 +705,7 @@ def test_plan_partition_changes_bespoke_names(self): self.assertEqual( output, [ - "ALTER TABLE `table` REORGANIZE PARTITION `p_future` INTO " + "ALTER TABLE `table` WAIT 2 REORGANIZE PARTITION `p_future` INTO " "(PARTITION `p_20210108` VALUES LESS THAN (170), " "PARTITION `p_20210115` VALUES LESS THAN MAXVALUE);" ], @@ -1007,7 +1007,7 @@ def testgenerate_sql_reorganize_partition_commands_single_change(self): ) ), [ - "ALTER TABLE `table` REORGANIZE PARTITION `p_20210102` INTO " + "ALTER TABLE `table` WAIT 2 REORGANIZE PARTITION `p_20210102` INTO " "(PARTITION `p_20210116` VALUES LESS THAN (542, 190));" ], ) @@ -1028,9 +1028,9 @@ def testgenerate_sql_reorganize_partition_commands_two_changes(self): ) ), [ - "ALTER TABLE `table` REORGANIZE PARTITION `p_20210120` INTO " + "ALTER TABLE `table` WAIT 2 REORGANIZE PARTITION `p_20210120` INTO " "(PARTITION `p_20210214` VALUES LESS THAN (2000));", - "ALTER TABLE `table` REORGANIZE PARTITION `p_20210102` INTO " + "ALTER TABLE `table` WAIT 2 REORGANIZE PARTITION `p_20210102` INTO " "(PARTITION `p_20210116` VALUES LESS THAN (500));", ], ) @@ -1052,7 +1052,7 @@ def testgenerate_sql_reorganize_partition_commands_new_partitions(self): ) ), [ - "ALTER TABLE `table` REORGANIZE PARTITION `p_20210102` INTO " + "ALTER TABLE `table` WAIT 2 REORGANIZE PARTITION `p_20210102` INTO " "(PARTITION `p_20210102` VALUES LESS THAN (200), " "PARTITION `p_20210116` VALUES LESS THAN (542), " "PARTITION `p_20210123` VALUES LESS THAN (662));" @@ -1081,7 +1081,7 @@ def testgenerate_sql_reorganize_partition_commands_maintain_new_partition(self): ) ), [ - "ALTER TABLE `table` REORGANIZE PARTITION `future` INTO " + "ALTER TABLE `table` WAIT 2 REORGANIZE PARTITION `future` INTO " "(PARTITION `p_20210114` VALUES LESS THAN (800), " "PARTITION `p_20210116` VALUES LESS THAN (1000), " "PARTITION `p_20210123` VALUES LESS THAN (1200), " @@ -1147,9 +1147,9 @@ def test_plan_andgenerate_sql_reorganize_partition_commands_with_future_partitio self.assertEqual( list(generate_sql_reorganize_partition_commands(Table("water"), planned)), [ - "ALTER TABLE `water` REORGANIZE PARTITION `future` INTO " + "ALTER TABLE `water` WAIT 2 REORGANIZE PARTITION `future` INTO " "(PARTITION `p_20210105` VALUES LESS THAN MAXVALUE);", - "ALTER TABLE `water` REORGANIZE PARTITION `p_20210104` INTO " + "ALTER TABLE `water` WAIT 2 REORGANIZE PARTITION `p_20210104` INTO " "(PARTITION `p_20210102` VALUES LESS THAN (200));", ], ) @@ -1211,7 +1211,7 @@ def test_get_pending_sql_reorganize_partition_commands_with_changes(self): self.assertEqual( list(cmds), [ - "ALTER TABLE `plushies` REORGANIZE PARTITION `future` INTO " + "ALTER TABLE `plushies` WAIT 2 REORGANIZE PARTITION `future` INTO " "(PARTITION `p_20210104` VALUES LESS THAN (550), " "PARTITION `p_20210111` VALUES LESS THAN (900), " "PARTITION `p_20210118` VALUES LESS THAN MAXVALUE);" diff --git a/partitionmanager/types.py b/partitionmanager/types.py index 687e1a8..5f7d9d8 100644 --- a/partitionmanager/types.py +++ b/partitionmanager/types.py @@ -603,3 +603,7 @@ class TableInformationException(Exception): class NoEmptyPartitionsAvailableException(Exception): """Raised if no empty partitions are available to safely modify.""" + + +class DatabaseCommandException(Exception): + """Raised if the database command failed."""