From 18e9855ce25df342a56ce14c3d69e883cc07c3a4 Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Sat, 24 Aug 2024 21:13:09 +0900 Subject: [PATCH 1/3] Recreate cursor after long running query --- src/sbosc/controller/validator.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/sbosc/controller/validator.py b/src/sbosc/controller/validator.py index f790d82..0dfb97d 100644 --- a/src/sbosc/controller/validator.py +++ b/src/sbosc/controller/validator.py @@ -268,12 +268,11 @@ def __validate_unmatched_pks(self): def validate_apply_dml_events(self, start_timestamp, end_timestamp): unmatched_pks = [] - with self.db.cursor() as cursor: - cursor: Cursor - - if start_timestamp <= end_timestamp: - self.logger.info(f"Start validating DML events from {start_timestamp} to {end_timestamp}") - for table in ['inserted_pk', 'updated_pk', 'deleted_pk']: + if start_timestamp <= end_timestamp: + self.logger.info(f"Start validating DML events from {start_timestamp} to {end_timestamp}") + for table in ['inserted_pk', 'updated_pk', 'deleted_pk']: + with self.db.cursor() as cursor: + cursor: Cursor cursor.execute(f''' ANALYZE TABLE {config.SBOSC_DB}.{table}_{self.migration_id} ''') @@ -301,14 +300,20 @@ def validate_apply_dml_events(self, start_timestamp, end_timestamp): for thread in threads: thread.result() - cursor.executemany(f''' - INSERT IGNORE INTO {config.SBOSC_DB}.unmatched_rows (source_pk, migration_id, unmatch_type) - VALUES (%s, {self.migration_id}, %s) - ''', unmatched_pks) + with self.db.cursor() as cursor: + cursor: Cursor + cursor.executemany(f''' + INSERT IGNORE INTO {config.SBOSC_DB}.unmatched_rows (source_pk, migration_id, unmatch_type) + VALUES (%s, {self.migration_id}, %s) + ''', unmatched_pks) + self.__validate_unmatched_pks() - cursor.execute( - f"SELECT COUNT(1) FROM {config.SBOSC_DB}.unmatched_rows WHERE migration_id = {self.migration_id}") - unmatched_rows = cursor.fetchone()[0] + + with self.db.cursor() as cursor: + cursor: Cursor + cursor.execute( + f"SELECT COUNT(1) FROM {config.SBOSC_DB}.unmatched_rows WHERE migration_id = {self.migration_id}") + unmatched_rows = cursor.fetchone()[0] # Even though validation logic is based on data in tables following valid condition can be achieved. # All events are being pushed to redis in validation stage. From cd436e9dc22c6eac51549c39916499d5888b1179 Mon Sep 17 00:00:00 2001 From: Peter Lyoo <102126644+peterlyoo@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:48:26 +0900 Subject: [PATCH 2/3] fix preferred window logic --- src/sbosc/component.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sbosc/component.py b/src/sbosc/component.py index 84090ea..e27e5f6 100644 --- a/src/sbosc/component.py +++ b/src/sbosc/component.py @@ -41,6 +41,8 @@ def is_preferred_window(): start_time_str, end_time_str = config.PREFERRED_WINDOW.split('-') start_time = datetime.strptime(start_time_str, '%H:%M').time() end_time = datetime.strptime(end_time_str, '%H:%M').time() + if start_time >= end_time: + return start_time <= current_time or current_time <= end_time return start_time <= current_time <= end_time def get_migration_id(self): From af8fa2938ddf3559348ae87f8c8bc721b4fcc63e Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Sat, 16 Nov 2024 13:09:55 +0900 Subject: [PATCH 3/3] fix bug on duplicate key --- src/sbosc/worker/worker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sbosc/worker/worker.py b/src/sbosc/worker/worker.py index c50474a..1b649f1 100644 --- a/src/sbosc/worker/worker.py +++ b/src/sbosc/worker/worker.py @@ -116,6 +116,10 @@ def bulk_import(self): start_pk = self.get_start_pk(chunk_info) if start_pk is None: return + # If the start_pk is greater than the end_pk, set the last_pk_inserted to end_pk + # This can happen when chunk ended with a duplicate key error + elif start_pk > chunk_info.end_pk: + chunk_info.last_pk_inserted = chunk_info.end_pk end_pk = chunk_info.end_pk chunk_info.status = ChunkStatus.IN_PROGRESS