From 69ee253b94b39b184268facb53ecfee9e2516c4d Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 22 Aug 2024 15:49:11 +0900 Subject: [PATCH 01/11] Add docker-compose file --- {charts => deploy/charts}/Chart.yaml | 0 .../charts}/templates/externalsecret.yaml | 0 .../charts}/templates/redis.yaml | 3 -- .../charts}/templates/sb-osc.yaml | 0 .../charts}/templates/serviceaccount.yaml | 0 {charts => deploy/charts}/values.yaml | 0 deploy/compose/config.yaml | 0 deploy/compose/docker-compose.yml | 50 +++++++++++++++++++ deploy/compose/redis.conf | 3 ++ deploy/compose/secret.json | 0 doc/usage.md | 2 +- src/config/env.py | 4 +- 12 files changed, 57 insertions(+), 5 deletions(-) rename {charts => deploy/charts}/Chart.yaml (100%) rename {charts => deploy/charts}/templates/externalsecret.yaml (100%) rename {charts => deploy/charts}/templates/redis.yaml (95%) rename {charts => deploy/charts}/templates/sb-osc.yaml (100%) rename {charts => deploy/charts}/templates/serviceaccount.yaml (100%) rename {charts => deploy/charts}/values.yaml (100%) create mode 100644 deploy/compose/config.yaml create mode 100644 deploy/compose/docker-compose.yml create mode 100644 deploy/compose/redis.conf create mode 100644 deploy/compose/secret.json diff --git a/charts/Chart.yaml b/deploy/charts/Chart.yaml similarity index 100% rename from charts/Chart.yaml rename to deploy/charts/Chart.yaml diff --git a/charts/templates/externalsecret.yaml b/deploy/charts/templates/externalsecret.yaml similarity index 100% rename from charts/templates/externalsecret.yaml rename to deploy/charts/templates/externalsecret.yaml diff --git a/charts/templates/redis.yaml b/deploy/charts/templates/redis.yaml similarity index 95% rename from charts/templates/redis.yaml rename to deploy/charts/templates/redis.yaml index 3edfafa..f2797ea 100644 --- a/charts/templates/redis.yaml +++ b/deploy/charts/templates/redis.yaml @@ -42,9 +42,6 @@ spec: - name: redis-data persistentVolumeClaim: claimName: redis-pvc - - name: redis-config - configMap: - name: redis-config - name: redis-secret secret: secretName: sb-osc-secret diff --git a/charts/templates/sb-osc.yaml b/deploy/charts/templates/sb-osc.yaml similarity index 100% rename from charts/templates/sb-osc.yaml rename to deploy/charts/templates/sb-osc.yaml diff --git a/charts/templates/serviceaccount.yaml b/deploy/charts/templates/serviceaccount.yaml similarity index 100% rename from charts/templates/serviceaccount.yaml rename to deploy/charts/templates/serviceaccount.yaml diff --git a/charts/values.yaml b/deploy/charts/values.yaml similarity index 100% rename from charts/values.yaml rename to deploy/charts/values.yaml diff --git a/deploy/compose/config.yaml b/deploy/compose/config.yaml new file mode 100644 index 0000000..e69de29 diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml new file mode 100644 index 0000000..63ede26 --- /dev/null +++ b/deploy/compose/docker-compose.yml @@ -0,0 +1,50 @@ +services: + controller: &component-base + image: "" # SB-OSC image + container_name: controller + environment: &component-env + AWS_REGION: "" # AWS region + CONFIG_FILE: "/opt/sb-osc/config.yaml" + SECRET_FILE: "/opt/sb-osc/secret.json" + volumes: + - ./config.yaml:/opt/sb-osc/config.yaml + - ./secret.json:/opt/sb-osc/secret.json + command: ["python", "-m", "sbosc.controller.main"] + depends_on: + - redis + + eventhandler: + <<: *component-base + container_name: eventhandler + command: ["python", "-m", "sbosc.eventhandler.main"] + depends_on: + - controller + + monitor: + <<: *component-base + container_name: monitor + command: ["python", "-m", "sbosc.monitor.main"] + depends_on: + - controller + + worker: + <<: *component-base + container_name: worker + command: ["python", "-m", "sbosc.worker.main"] + environment: + <<: *component-env + POD_NAME: "worker" + depends_on: + - controller + + redis: + image: "redis:7.0.4" + container_name: redis + ports: + - "6379:6379" + volumes: + - redis-data:/data + - ./redis.conf:/usr/local/etc/redis/redis.conf + +volumes: + redis-data: diff --git a/deploy/compose/redis.conf b/deploy/compose/redis.conf new file mode 100644 index 0000000..44e5907 --- /dev/null +++ b/deploy/compose/redis.conf @@ -0,0 +1,3 @@ +requirepass "" +appendonly yes +save "" diff --git a/deploy/compose/secret.json b/deploy/compose/secret.json new file mode 100644 index 0000000..e69de29 diff --git a/doc/usage.md b/doc/usage.md index c633639..1fc8a08 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -67,7 +67,7 @@ SB-OSC requires binlog to be enabled on the source database. Please set `binlog_ - Set `range_optimizer_max_mem_size` to `0` or a large value to prevent bad query plans on queries with large `IN` clauses (especially on Aurora v3) ## 4. Run SB-OSC -When all of the above steps are completed, you can start the migration process by installing the [helm chart](../charts) +When all of the above steps are completed, you can start the migration process by installing the [helm chart](../deploy/charts) ```bash helm install charts sb-osc -n sb-osc --create-namespace diff --git a/src/config/env.py b/src/config/env.py index d8268bc..1c2d1ea 100644 --- a/src/config/env.py +++ b/src/config/env.py @@ -10,7 +10,9 @@ class Env: Default values set in the annotations are used if the environment variable is not set. """ AWS_REGION: str = 'ap-northeast-2' - POD_NAME: str = 'local' # POD_NAME = 'local' will determine whether it's running in a local environment or not. + # POD_NAME is used to uniquely define worker metric key for each worker instance. + # Manually set this unique for each worker if not running in k8s. + POD_NAME: str = 'local' CONFIG_FILE: str = '/opt/sb-osc/config.yaml' SECRET_FILE: str = '/opt/sb-osc/secret.json' From 2d72d8258ba7ed0098719c17cbd7abeecd0af296 Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 22 Aug 2024 16:38:03 +0900 Subject: [PATCH 02/11] Add sample secret and config --- deploy/compose/config.yaml | 43 +++++++++++++++++++++++++++++++ deploy/compose/docker-compose.yml | 4 +++ deploy/compose/secret.json | 9 +++++++ tests/configs/config.yaml | 12 ++++----- 4 files changed, 62 insertions(+), 6 deletions(-) diff --git a/deploy/compose/config.yaml b/deploy/compose/config.yaml index e69de29..2aff5f1 100644 --- a/deploy/compose/config.yaml +++ b/deploy/compose/config.yaml @@ -0,0 +1,43 @@ +auto_swap: false # Whether to swap tables automatically +preferred_window: "00:00-23:59" # Preferred window for swapping tables & bulk import validation + +source_writer_endpoint: "" +source_reader_endpoint: "" +destination_writer_endpoint: "" +destination_reader_endpoint: "" + +min_chunk_size: 100000 +max_chunk_count: 200 + +# Worker +min_batch_size: 200 +max_batch_size: 1000 +batch_size_step_size: 200 + +min_thread_count: 1 +max_thread_count: 8 +thread_count_step_size: 1 + +commit_interval_in_seconds: 1 +use_batch_size_multiplier: true + +# Monitor +cpu_soft_threshold: 40 # Soft threshold for CPU usage. If the CPU usage exceeds this value, thread count will be decreased into half. +cpu_hard_threshold: 60 # Hard threshold for CPU usage. If the CPU usage exceeds this value, thread count will be decreased to 0. +write_latency_soft_threshold: 30 # Soft threshold for WriteLatency. If the latency exceeds this value, batch size will be decreased into half. +write_latency_hard_threshold: 50 # Hard threshold for WriteLatency. If the latency exceeds this value, batch size will be decreased to 0. + +# EventHandler +eventhandler_thread_count: 4 # Number of threads for EventHandler. Max number of binlog files to read at once. (Max 4 recommended) +eventhandler_thread_timeout_in_seconds: 300 # Timeout for EventHandler thread. If the thread is not finished within this time, it raises exception and restarts EventHandler. + +# EventLoader +pk_set_max_size: 1000000 # Max number of DML PKs to load from DB at once. No more than 2 * pk_set_max_size will be kept in Redis. This is used for memory optimization. +event_batch_duration_in_seconds: 3600 # Timestamp range of DML events to load from DB at once (seconds). + +# Validator +bulk_import_validation_batch_size: 1000000 # Batch size for bulk import validation +apply_dml_events_validation_batch_size: 3000 # Batch size for DML event validation +apply_dml_events_validation_interval_in_seconds: 10 # Interval for DML event validation (seconds) +full_dml_event_validation_interval_in_hours: 1 # Interval for full DML event validation (hours) +validation_thread_count: 4 # Number of threads to use for validation process diff --git a/deploy/compose/docker-compose.yml b/deploy/compose/docker-compose.yml index 63ede26..c177f8e 100644 --- a/deploy/compose/docker-compose.yml +++ b/deploy/compose/docker-compose.yml @@ -10,6 +10,7 @@ services: - ./config.yaml:/opt/sb-osc/config.yaml - ./secret.json:/opt/sb-osc/secret.json command: ["python", "-m", "sbosc.controller.main"] + restart: always depends_on: - redis @@ -40,6 +41,9 @@ services: redis: image: "redis:7.0.4" container_name: redis + command: + - redis-server + - /usr/local/etc/redis/redis.conf ports: - "6379:6379" volumes: diff --git a/deploy/compose/secret.json b/deploy/compose/secret.json index e69de29..bf3aceb 100644 --- a/deploy/compose/secret.json +++ b/deploy/compose/secret.json @@ -0,0 +1,9 @@ +{ + "username": "root", + "password": "", + "port": "3306", + "redis_host": "127.0.0.1", + "redis_password": "", + "slack_channel": "", + "slack_token": "" +} diff --git a/tests/configs/config.yaml b/tests/configs/config.yaml index af6674b..2950955 100644 --- a/tests/configs/config.yaml +++ b/tests/configs/config.yaml @@ -26,26 +26,26 @@ max_batch_size: 10000 min_thread_count: 4 thread_count_step_size: 4 max_thread_count: 8 -commit_interval: 0.01 +commit_interval_in_seconds: 0.01 optimal_value_use_limit: 10 use_batch_size_multiplier: False # EventHandler config event_handler_thread_count: 4 -event_handler_thread_timeout: 300 +event_handler_thread_timeout_in_seconds: 300 # Threshold cpu_soft_threshold: 70 cpu_hard_threshold: 90 -latency_soft_threshold: 20 -latency_hard_threshold: 50 +write_latency_soft_threshold: 20 +write_latency_hard_threshold: 50 # Validation bulk_import_validation_batch_size: 1000 apply_dml_events_validation_batch_size: 1000 validation_thread_count: 10 -full_dml_event_validation_interval: 1 +full_dml_event_validation_interval_in_hours: 1 # DML event loading pk_set_max_size: 10000 -event_batch_duration: 1 +event_batch_duration_in_seconds: 1 From 8c680ea578c7e8012a1123c9709549a706434d00 Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 22 Aug 2024 17:18:04 +0900 Subject: [PATCH 03/11] Add documentation --- .github/workflows/linters.yml | 5 +- README.md | 2 +- deploy/README.md | 12 ++++ doc/usage.md => deploy/charts/README.md | 6 +- deploy/compose/README.md | 64 +++++++++++++++++++ tests/README.md | 17 +++++ .../docker-compose.yml | 4 +- 7 files changed, 101 insertions(+), 9 deletions(-) create mode 100644 deploy/README.md rename doc/usage.md => deploy/charts/README.md (93%) create mode 100644 deploy/compose/README.md create mode 100644 tests/README.md rename docker-compose.yml => tests/docker-compose.yml (81%) diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml index a34a2b8..fe25320 100644 --- a/.github/workflows/linters.yml +++ b/.github/workflows/linters.yml @@ -1,10 +1,11 @@ name: Linters on: - pull_request: push: branches: - - master + - main + pull_request: + workflow_dispatch: jobs: flake8_py3: diff --git a/README.md b/README.md index 194dc4d..c1fdb7b 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ processing its own binlog events. - `binlog_format` set to `ROW` - `binlog-ignore-db` set to `sbosc` (Recommended) -Detailed requirements and setup instructions can be found in the [usage guide](doc/usage.md). +Detailed requirements and setup instructions can be found in the [usage guide](deploy/README). ## Performance diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 0000000..79993f3 --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,12 @@ +# Usage Guide + +SB-OSC is designed to be deployed as a containerized application. +It can be run on both Kubernetes and Docker environments. + +For Kubernetes deployment refer to [charts](./charts) directory, and for Docker deployment refer to [compose](./compose) directory. + +### Building Docker Image +You can build Docker image using Dockerfile in the root directory. +```bash +docker build -t sb-osc . +``` diff --git a/doc/usage.md b/deploy/charts/README.md similarity index 93% rename from doc/usage.md rename to deploy/charts/README.md index 1fc8a08..3a626ea 100644 --- a/doc/usage.md +++ b/deploy/charts/README.md @@ -1,4 +1,4 @@ -# Usage +# Deploying on EKS Cluster ## 1. Create AWS Resources @@ -54,7 +54,7 @@ SB-OSC uses ExternalSecrets with SecretsManager for credentials. Following keys - `slack_channel`: Slack channel ID (Optional) – `slack_token`: Slack app token (Optional) -You can find these keys in [secret.py](../src/config/secret.py) +You can find these keys in [secret.py](../../src/config/secret.py) ## 2. Create Destination Table SB-OSC does not create destination table on its own. Table should be manually created before starting migration. @@ -67,7 +67,7 @@ SB-OSC requires binlog to be enabled on the source database. Please set `binlog_ - Set `range_optimizer_max_mem_size` to `0` or a large value to prevent bad query plans on queries with large `IN` clauses (especially on Aurora v3) ## 4. Run SB-OSC -When all of the above steps are completed, you can start the migration process by installing the [helm chart](../deploy/charts) +When all of the above steps are completed, you can start the migration process by installing the [helm chart]() ```bash helm install charts sb-osc -n sb-osc --create-namespace diff --git a/deploy/compose/README.md b/deploy/compose/README.md new file mode 100644 index 0000000..1928998 --- /dev/null +++ b/deploy/compose/README.md @@ -0,0 +1,64 @@ +# Deploying with Docker Compose + +## 1. Create IAM Role + +### IAM Role + +IAM role is required for the `monitor` to access CloudWatch metrics. + +Create an IAM role with the following policy: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "cloudwatch:GetMetricStatistics" + ], + "Resource": "*" + } + ] +} +``` + +Attach this role to the instance where SB-OSC is running. + +## 2. Write Config Files +You have to write three config files for SB-OSC to run properly. + +### `config.yaml` +This files contains the configuration for SB-OSC. You can find the template in [config.yaml](config.yaml). +All values are loaded into `Config` class in [config.py](../../src/config/config.py). + +### `secret.json` +This file contains the credentials for the database, redis, and slack. You can find the template in [secret.json](secret.json). All values are loaded into `Secret` class in [secret.py](../../src/config/secret.py). + +- `username`: Database username +- `password`: Database password +- `port`: Database port +- `redis_host`: Redis endpoint (k8s Service name) +- `redis_password`: Redis password +- `slack_channel`: Slack channel ID (Optional) +– `slack_token`: Slack app token (Optional) + +### `redis.conf` +This file contains the configuration for the Redis server. You can find the template in [redis.conf](redis.conf). +- `requirepass ""`: Match the `redis_password` set in `secret.json` +- `appendonly yes`: Enable AOF persistence +- `save ""`: Disable RDB persistence + +## 3. Create Destination Table +SB-OSC does not create destination table on its own. Table should be manually created before starting migration. + +## 4. Enable Binlog +SB-OSC requires binlog to be enabled on the source database. Please set `binlog_format` to `ROW` + +### Other Parameters +- Setting `binlog-ignore-db` to `sbosc` is recommended to prevent SB-OSC from processing its own binlog events. +- Set `range_optimizer_max_mem_size` to `0` or a large value to prevent bad query plans on queries with large `IN` clauses (especially on Aurora v3) + +## 5. Run SB-OSC +When all of the above steps are completed, you can start the migration process by running docker compose. + +Please double-check if the `docker-compose.yml` file is correctly configured (ex. `image`, `AWS_REGION`, etc.) diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..326c34f --- /dev/null +++ b/tests/README.md @@ -0,0 +1,17 @@ +# Testing + +You can run tests on both local and GitHub actions. +`flake8` is used for linting and `pytest` is used for unit tests. + +### Local Environment +To run tests locally, you need to have `docker` and `docker-compose` installed. +`docker-compose` runs a MySQL container and a Redis container for testing. + +On root directory, run the following command: +```bash +docker-compose -f tests/docker-compose.yml up -d +python -m pytest tests +``` + +### GitHub Actions +GitHub Actions runs tests on pull requests and pushes to the main branch. diff --git a/docker-compose.yml b/tests/docker-compose.yml similarity index 81% rename from docker-compose.yml rename to tests/docker-compose.yml index 8acc8ab..a375eab 100644 --- a/docker-compose.yml +++ b/tests/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.5' - services: mysql: image: "mysql:8.0.34" @@ -7,7 +5,7 @@ services: ports: - "3306:3306" volumes: - - ./tests/configs/my.cnf:/etc/mysql/conf.d/my.cnf + - ./configs/my.cnf:/etc/mysql/conf.d/my.cnf environment: MYSQL_ALLOW_EMPTY_PASSWORD: 1 MYSQL_ROOT_HOST: "%" From ca578bbfc2fb3a7d557b5bf945d00941f1265d90 Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 22 Aug 2024 17:21:37 +0900 Subject: [PATCH 04/11] Fix file path --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c1fdb7b..345fa5b 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ processing its own binlog events. - `binlog_format` set to `ROW` - `binlog-ignore-db` set to `sbosc` (Recommended) -Detailed requirements and setup instructions can be found in the [usage guide](deploy/README). +Detailed requirements and setup instructions can be found in the [usage guide](deploy/README.md). ## Performance From 1e1f6c623bb9e429aea637df3401ee194a9925ae Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 22 Aug 2024 17:39:21 +0900 Subject: [PATCH 05/11] Add table alias --- src/sbosc/operations/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sbosc/operations/base.py b/src/sbosc/operations/base.py index 3ad0ebc..752de21 100644 --- a/src/sbosc/operations/base.py +++ b/src/sbosc/operations/base.py @@ -11,7 +11,7 @@ def _insert_batch_query(self, start_pk, end_pk): return f''' INSERT INTO {self.destination_db}.{self.destination_table}({self.source_columns}) SELECT {self.source_columns} - FROM {self.source_db}.{self.source_table} + FROM {self.source_db}.{self.source_table} AS source WHERE id BETWEEN {start_pk} AND {end_pk} ''' @@ -132,7 +132,7 @@ class CrossClusterBaseOperation(MigrationOperation): def _select_batch_query(self, start_pk, end_pk): return f''' SELECT {self.source_columns} - FROM {self.source_db}.{self.source_table} + FROM {self.source_db}.{self.source_table} AS source WHERE id BETWEEN {start_pk} AND {end_pk} ''' From 8d19427e21f2abd016ff439864d18ffeea9f2b2a Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 22 Aug 2024 17:43:06 +0900 Subject: [PATCH 06/11] add PYTHONPATH to testing docs --- tests/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/README.md b/tests/README.md index 326c34f..0b00d78 100644 --- a/tests/README.md +++ b/tests/README.md @@ -10,6 +10,7 @@ To run tests locally, you need to have `docker` and `docker-compose` installed. On root directory, run the following command: ```bash docker-compose -f tests/docker-compose.yml up -d +export PYTHONPATH="$(pwd)/src/" python -m pytest tests ``` From bf779e6f47e1ce85ef0aa051ee0b308e73878e6e Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 5 Sep 2024 17:17:16 +0900 Subject: [PATCH 07/11] Change default configs, simplify default config --- README.md | 9 +--- deploy/compose/README.md | 6 ++- deploy/compose/config.yaml | 99 ++++++++++++++++++++++++++------------ deploy/compose/secret.json | 2 +- src/config/config.py | 38 +++++++-------- 5 files changed, 92 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 345fa5b..e149114 100644 --- a/README.md +++ b/README.md @@ -59,14 +59,7 @@ load when production traffic increases. ## Requirements -SB-OSC is designed to work with Aurora MySQL database, and it's an EKS-based tool. - -It requires the following resources to run: - -- Aurora MySQL database (v2, v3) -- EKS cluster -- AWS SecretsManager secret -- IAM role +SB-OSC is designed to work with Aurora MySQL database. It's a containerized application that can be run on both Kubernetes and Docker environments. SB-OSC accepts `ROW` for binlog format. It is recommended to set `binlog-ignore-db` to `sbosc` to prevent SB-OSC from processing its own binlog events. diff --git a/deploy/compose/README.md b/deploy/compose/README.md index 1928998..3fec4d4 100644 --- a/deploy/compose/README.md +++ b/deploy/compose/README.md @@ -37,11 +37,13 @@ This file contains the credentials for the database, redis, and slack. You can f - `username`: Database username - `password`: Database password - `port`: Database port -- `redis_host`: Redis endpoint (k8s Service name) -- `redis_password`: Redis password +- `redis_host`: Redis endpoint (Docker container name) +- `redis_password`: Redis password (Optional) - `slack_channel`: Slack channel ID (Optional) – `slack_token`: Slack app token (Optional) +`redis_password` is optional. Keep in mind that if you set a password in `redis.conf`, you should set the same password in `secret.json`. + ### `redis.conf` This file contains the configuration for the Redis server. You can find the template in [redis.conf](redis.conf). - `requirepass ""`: Match the `redis_password` set in `secret.json` diff --git a/deploy/compose/config.yaml b/deploy/compose/config.yaml index 2aff5f1..a2777b4 100644 --- a/deploy/compose/config.yaml +++ b/deploy/compose/config.yaml @@ -1,43 +1,78 @@ -auto_swap: false # Whether to swap tables automatically -preferred_window: "00:00-23:59" # Preferred window for swapping tables & bulk import validation +#################### +# Required configs # +#################### +# Migration plan source_writer_endpoint: "" -source_reader_endpoint: "" destination_writer_endpoint: "" -destination_reader_endpoint: "" +source_db: "" +source_table: "" +destination_db: "" +destination_table: "" -min_chunk_size: 100000 -max_chunk_count: 200 +auto_swap: false # Whether to swap tables automatically. (Default: false) +preferred_window: "00:00-23:59" # Preferred window for swapping tables & bulk import validation. (Default: "00:00-23:59") -# Worker -min_batch_size: 200 -max_batch_size: 1000 -batch_size_step_size: 200 +# Worker config +min_batch_size: 500 # Starting batch size to use. (Default: 500) +max_batch_size: 3000 # Desired batch size to use. (Default: 3000) +batch_size_step_size: 500 # Step size to increase batch size. (Default: 500) -min_thread_count: 1 -max_thread_count: 8 -thread_count_step_size: 1 +min_thread_count: 1 # Starting thread count to use. (Default: 1) +max_thread_count: 8 # Desired thread count to use. (Default: 8) +thread_count_step_size: 1 # Step size to increase thread count. (Default: 1) -commit_interval_in_seconds: 1 -use_batch_size_multiplier: true +commit_interval_in_seconds: 1 # Time wait after each query executed by worker. (Default: 1) -# Monitor -cpu_soft_threshold: 40 # Soft threshold for CPU usage. If the CPU usage exceeds this value, thread count will be decreased into half. -cpu_hard_threshold: 60 # Hard threshold for CPU usage. If the CPU usage exceeds this value, thread count will be decreased to 0. -write_latency_soft_threshold: 30 # Soft threshold for WriteLatency. If the latency exceeds this value, batch size will be decreased into half. -write_latency_hard_threshold: 50 # Hard threshold for WriteLatency. If the latency exceeds this value, batch size will be decreased to 0. +# Validator +bulk_import_validation_batch_size: 10000 # Batch size for bulk import validation (Default: 10000) +apply_dml_events_validation_batch_size: 1000 # Batch size for DML event validation (Default: 1000) +validation_thread_count: 4 # Number of threads to use for validation process (Default: 4) -# EventHandler -eventhandler_thread_count: 4 # Number of threads for EventHandler. Max number of binlog files to read at once. (Max 4 recommended) -eventhandler_thread_timeout_in_seconds: 300 # Timeout for EventHandler thread. If the thread is not finished within this time, it raises exception and restarts EventHandler. +#################### +# Optional configs # +#################### -# EventLoader -pk_set_max_size: 1000000 # Max number of DML PKs to load from DB at once. No more than 2 * pk_set_max_size will be kept in Redis. This is used for memory optimization. -event_batch_duration_in_seconds: 3600 # Timestamp range of DML events to load from DB at once (seconds). +# Migration plan +# sbosc_db: "sbosc" # Database to create sb-osc tables. (Default: "sbosc") +# source_reader_endpoint: ~ # If not provided, source_writer_endpoint will be used. (Default: ~) +# source_cluster_id: ~ # If not provided, cluster id will be retrieved from source_writer_endpoint (Default: ~) +# destination_reader_endpoint: ~ # If not provided, destination_writer_endpoint will be used (Default: ~) +# destination_cluster_id: ~ # If not provided, cluster id will be retrieved from destination_writer_endpoint (Default: ~) +# min_chunk_size: 100000 # Minimum chunk size to create. (Default: 100000) +# max_chunk_count: 200 # Maximum number of chunks to create. (Default: 200) +# wait_interval_until_auto_swap_in_seconds: 60 # Interval to wait until auto swap. (Default: 60) +# skip_bulk_import: false # Whether to skip bulk import. (Default: false) +# disable_apply_dml_events: false # Whether to disable applying dml events. (Default: false) +# operation_class: BaseOperation # Operation class to use. (Default: BaseOperation) +# indexes: [] # Indexes to create after bulk import. (Default: []) +# index_created_per_query: 4 # Number of indexes to create per iteration. (Default: 4) +# innodb_ddl_buffer_size: ~ # innodb_ddl_buffer_size for MySQL. (Default: ~) +# innodb_ddl_threads: ~ # innodb_ddl_threads for MySQL. (Default: ~) +# innodb_parallel_read_threads : ~ # innodb_parallel_read_threads for MySQL. (Default: ~) -# Validator -bulk_import_validation_batch_size: 1000000 # Batch size for bulk import validation -apply_dml_events_validation_batch_size: 3000 # Batch size for DML event validation -apply_dml_events_validation_interval_in_seconds: 10 # Interval for DML event validation (seconds) -full_dml_event_validation_interval_in_hours: 1 # Interval for full DML event validation (hours) -validation_thread_count: 4 # Number of threads to use for validation process +# Worker config +# use_batch_size_multiplier: false # Whether to use batch size multiplier. (Default: false) + +# EventHandler config +# eventhandler_thread_count: 4 # Number of threads for EventHandler. Max number of binlog files to read at once. (Default 4. Max 4 recommended) +# eventhandler_thread_timeout_in_seconds: 300 # Timeout for EventHandler thread. If the thread is not finished within this time, it raises exception and restarts EventHandler. (Default: 300) +# init_binlog_file: ~ # Initial binlog file to start reading. (Default: ~) +# init_binlog_position: ~ # Initial binlog position to start reading. (Default: ~) + +# Monitor threshold +# cpu_soft_threshold: 40 # Soft threshold for CPU usage. If the CPU usage exceeds this value, thread count will be decreased into half. (Default: 40) +# cpu_hard_threshold: 60 # Hard threshold for CPU usage. If the CPU usage exceeds this value, thread count will be decreased to 0. (Default: 60) +# write_latency_soft_threshold: 30 # Soft threshold for WriteLatency. If the latency exceeds this value, batch size will be decreased into half. (Default: 30) +# write_latency_hard_threshold: 50 # Hard threshold for WriteLatency. If the latency exceeds this value, batch size will be decreased to 0. (Default: 50) + +# Validation config +# apply_dml_events_validation_interval_in_seconds: 10 # Interval for DML event validation (seconds) (Default: 10) +# full_dml_event_validation_interval_in_hours: 0 # Interval for full DML event validation. 0 disables full DML event validation (hours) (Default: 0) + +# EventLoader config +# pk_set_max_size: 100000 # Max number of DML PKs to load from DB at once. No more than 2 * pk_set_max_size will be kept in Redis. This is used for memory optimization. (Default: 100000) +# event_batch_duration_in_seconds: 3600 # Timestamp range of DML events to load from DB at once (seconds). (Default: 3600) + +# Operation class config +# operation_class_config: ~ # Operation class specific configurations. (Default: ~) diff --git a/deploy/compose/secret.json b/deploy/compose/secret.json index bf3aceb..e4819ed 100644 --- a/deploy/compose/secret.json +++ b/deploy/compose/secret.json @@ -2,7 +2,7 @@ "username": "root", "password": "", "port": "3306", - "redis_host": "127.0.0.1", + "redis_host": "redis", "redis_password": "", "slack_channel": "", "slack_token": "" diff --git a/src/config/config.py b/src/config/config.py index dc68c98..81d8521 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -73,14 +73,14 @@ class Config: INNODB_PARALLEL_READ_THREADS = None # optional # Worker config - MIN_BATCH_SIZE = 100 - BATCH_SIZE_STEP_SIZE = 100 - MAX_BATCH_SIZE = 10000 - MIN_THREAD_COUNT = 4 - THREAD_COUNT_STEP_SIZE = 4 - MAX_THREAD_COUNT = 64 - COMMIT_INTERVAL_IN_SECONDS = 0.01 - OPTIMAL_VALUE_USE_LIMIT = 10 + MIN_BATCH_SIZE = 1000 + BATCH_SIZE_STEP_SIZE = 500 + MAX_BATCH_SIZE = 3000 + MIN_THREAD_COUNT = 1 + THREAD_COUNT_STEP_SIZE = 1 + MAX_THREAD_COUNT = 8 + COMMIT_INTERVAL_IN_SECONDS = 1 + OPTIMAL_VALUE_USE_LIMIT = 3 USE_BATCH_SIZE_MULTIPLIER = False # EventHandler config @@ -89,24 +89,24 @@ class Config: INIT_BINLOG_FILE: str = None INIT_BINLOG_POSITION: int = None - # Threshold - CPU_SOFT_THRESHOLD = 70 - CPU_HARD_THRESHOLD = 90 - WRITE_LATENCY_SOFT_THRESHOLD = 20 # milliseconds + # Monitor threshold + CPU_SOFT_THRESHOLD = 40 + CPU_HARD_THRESHOLD = 60 + WRITE_LATENCY_SOFT_THRESHOLD = 30 # milliseconds WRITE_LATENCY_HARD_THRESHOLD = 50 # milliseconds - # Validation - BULK_IMPORT_VALIDATION_BATCH_SIZE = 100000 - APPLY_DML_EVENTS_VALIDATION_BATCH_SIZE = 100000 + # Validation config + BULK_IMPORT_VALIDATION_BATCH_SIZE = 10000 + APPLY_DML_EVENTS_VALIDATION_BATCH_SIZE = 1000 VALIDATION_THREAD_COUNT = 4 APPLY_DML_EVENTS_VALIDATION_INTERVAL_IN_SECONDS = 10 - FULL_DML_EVENT_VALIDATION_INTERVAL_IN_HOURS = 1 + FULL_DML_EVENT_VALIDATION_INTERVAL_IN_HOURS = 0 - # DML event loading - PK_SET_MAX_SIZE = 1000000 + # EventLoader config + PK_SET_MAX_SIZE = 100000 EVENT_BATCH_DURATION_IN_SECONDS = 3600 - # Operation Class specific config + # OperationClass config OPERATION_CLASS_CONFIG = {} @property From 50fb91b8f7548b06b78ff53b973b4583b07bfca8 Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Thu, 5 Sep 2024 17:40:59 +0900 Subject: [PATCH 08/11] add troubleshooting --- deploy/README.md | 3 +++ doc/troubleshooting.md | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/deploy/README.md b/deploy/README.md index 79993f3..a96768f 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -10,3 +10,6 @@ You can build Docker image using Dockerfile in the root directory. ```bash docker build -t sb-osc . ``` + +### Troubleshooting +Issues and solutions that may occur when using SB-OSC can be found in [troubleshooting.md](../doc/troubleshooting.md). diff --git a/doc/troubleshooting.md b/doc/troubleshooting.md index 75a4b3a..57046dc 100644 --- a/doc/troubleshooting.md +++ b/doc/troubleshooting.md @@ -2,6 +2,50 @@ This sections provides list of possible issues and solutions that may occur when using SB-OSC. +### Redis errors +**AuthenticationError** +``` +redis.exceptions.AuthenticationError: AUTH called without any password configured for the default user. Are you sure your configuration is correct? +``` +This error occurs when `redis_password` in `secret.json` is set and `requirepass` in `redis.conf` is not set or empty. Make sure to set the same password in both files. + +``` +redis.exceptions.AuthenticationError: invalid username-password pair or user is disabled. +``` +This error occurs when `redis_password` in `secret.json` is set and `requirepass` in `redis.conf` is set but the password is incorrect. + +**ConnectionError** +You can encounter `redis.exceptions.ConnectionError` when redis host is not set properly. Make sure to set the correct host in `secret.json`. + +### no attribute 'logger' +``` +AttributeError: 'WorkerManager' object has no attribute 'logger' +AttributeError: 'MetricMonitor' object has no attribute 'logger' +AttributeError: 'EventHandler' object has no attribute 'logger' +``` +This error can occur on two occasions. + +SB-OSC starts its process from controller, and controller creates required tables, and sets required redis keys. Other processes waits and restarts until it finishes. Above error can occur during controller's initializing process. This will be resolved shortly after controller finishes initializing process. + +However, if there were any problems during controller's initializing process, especially when setting redis keys, it can't be resolved automatically. In this case, you need to delete row in `migration_plan` table so that you can start process with new `migration_id`. Following error log from controller can indicate this problem. +``` + self.source_column_list: list = metadata.source_columns.split(',') + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + AttributeError: 'NoneType' object has no attribute 'split' +``` + +### TypeError on self.log_file = status['File'] +EventHandler uses `SHOW MASTER STATUS` to get the current binlog file. If `SHOW MASTER STATUS` returns 0 rows, this error will occur. Please double-check if binlog is enabled on the source database. + +### ZeroDivisionError: integer division or modulo by zero +``` + chunk_size = max_id // chunk_count + ~~~~~~~^^~~~~~~~~~~~~ + ZeroDivisionError: integer division or modulo by zero +``` +`chunk_count` is calculated from `min(max_id // min_chunk_size, max_chunk_count)`. +If `min_chunk_size` is larger than `max_id`, `chunk_count` will be 0. In this case, you need to decrease `min_chunk_size`. Restart components after changing the value to apply the change. + ### apply_dml_events_validation_batch_size ~~When setting `apply_dml_events_validation_batch_size` there are two factors to consider. Since the binlog resolution is in seconds, if the number of DML events in a second is greater than the batch size, the validation process can hang indefinitely. In this case, it is recommended to increase the batch size.~~ -> This issue was fixed by [#10](https://github.com/sendbird/sb-osc/pull/10) From bc5c8ea1379836c8414e0e1993b68aa369696e15 Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Mon, 23 Sep 2024 17:59:03 +0900 Subject: [PATCH 09/11] make reader endpoints required --- deploy/compose/config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy/compose/config.yaml b/deploy/compose/config.yaml index a2777b4..6237e9c 100644 --- a/deploy/compose/config.yaml +++ b/deploy/compose/config.yaml @@ -3,8 +3,10 @@ #################### # Migration plan -source_writer_endpoint: "" -destination_writer_endpoint: "" +source_writer_endpoint: "" # If source_cluster_id is not provided, this must be cluster writer endpoint. +source_reader_endpoint: "" +destination_writer_endpoint: "" # If destination_cluster_id is not provided, this must be cluster writer endpoint. +destination_reader_endpoint: "" source_db: "" source_table: "" destination_db: "" @@ -35,9 +37,7 @@ validation_thread_count: 4 # Number of threads to use for validation process (D # Migration plan # sbosc_db: "sbosc" # Database to create sb-osc tables. (Default: "sbosc") -# source_reader_endpoint: ~ # If not provided, source_writer_endpoint will be used. (Default: ~) # source_cluster_id: ~ # If not provided, cluster id will be retrieved from source_writer_endpoint (Default: ~) -# destination_reader_endpoint: ~ # If not provided, destination_writer_endpoint will be used (Default: ~) # destination_cluster_id: ~ # If not provided, cluster id will be retrieved from destination_writer_endpoint (Default: ~) # min_chunk_size: 100000 # Minimum chunk size to create. (Default: 100000) # max_chunk_count: 200 # Maximum number of chunks to create. (Default: 200) From 52d9e908e1d6d88881c313b40035f8de4362b2df Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Wed, 30 Oct 2024 15:57:57 +0900 Subject: [PATCH 10/11] add usage.md --- README.md | 2 +- doc/usage.md | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 doc/usage.md diff --git a/README.md b/README.md index e149114..49d1d03 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ processing its own binlog events. - `binlog_format` set to `ROW` - `binlog-ignore-db` set to `sbosc` (Recommended) -Detailed requirements and setup instructions can be found in the [usage guide](deploy/README.md). +Detailed requirements and setup instructions can be found in the [deployment guide](deploy/README.md). ## Performance diff --git a/doc/usage.md b/doc/usage.md new file mode 100644 index 0000000..5d49403 --- /dev/null +++ b/doc/usage.md @@ -0,0 +1,39 @@ +# Usage Guide +This guide will give you instructions on operating SB-OSC during runtime. + +### Changing Configurations +When you need to change configurations, you can edit `config.yaml` and `secret.json` files. After editing, you need to restart the containers to apply the changes. +Most of the values in `config.yaml` can be modified after starting the process. +However, there are some configs that need to be set before starting the process. + +Following configurations should be set before starting the process: + +**Required fields** +- Values that determine `migration_id` + - `source_cluster_id` + - `source_writer_endpoint` (if `source_cluster_id` is not provided) + - `destination_writer_endpoint` (if `destination_cluster_id` is not provided) + - `source_db` + - `source_table` + - `destination_db` + - `destination_table` + +**Optional fields** +- `sbosc_db`: SB-OSC database name +- Starting binlog file and position + - `init_binlog_file` + - `init_binlog_position` +- Bulk import chunk related config + - `max_chunk_count` + - `min_chunk_size` +- Index related config + - `indexes` + - `index_created_per_query` + +If you want to change these values, you have to modify corresponding data sources (e.g. database, Redis) manually. +For migration_id related values, you need to delete the row in the `migration_plan` table and for index related values, you need to modify `index_creation_status` table. + +### Pausing and Resuming +SB-OSC is resumable at any stage of the schema migration process. It saves the current state of each stage to the database and Redis, allowing users to pause and resume the process at any time, as long as binlog retention is sufficient. + +To pause the process, you just have to stop all the containers. To resume the process, you need to start the containers again. All checkpoints are saved in the database and Redis, so the process will resume from the last checkpoint. From 30460ed008dc2bf082fdf21aec842bdc76243f98 Mon Sep 17 00:00:00 2001 From: Jimmy Kim Date: Wed, 30 Oct 2024 16:10:12 +0900 Subject: [PATCH 11/11] add description for requirepass --- deploy/compose/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deploy/compose/README.md b/deploy/compose/README.md index 3fec4d4..5984cd7 100644 --- a/deploy/compose/README.md +++ b/deploy/compose/README.md @@ -46,7 +46,8 @@ This file contains the credentials for the database, redis, and slack. You can f ### `redis.conf` This file contains the configuration for the Redis server. You can find the template in [redis.conf](redis.conf). -- `requirepass ""`: Match the `redis_password` set in `secret.json` +- `requirepass ""`: Match the `redis_password` set in `secret.json`. + - If `requirepass ""` is set, this means that the Redis server does not require a password. Fill in the password between the quotes to set a password. - `appendonly yes`: Enable AOF persistence - `save ""`: Disable RDB persistence