From e9127df129dd0f141242dc7d0da9e631cf8f4652 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Wed, 24 Jan 2024 22:38:16 +0530 Subject: [PATCH] Introduce sleep in tests to avoid bigquery rate limits (#2110) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're encountering difficulties achieving success with the integration test `test_aql_replace_existing_table` in our CI environment. The issue stems from `Google BigQuery` imposing rate limits on table operations—specifically, restricting them to `5` per `10` seconds per table. As a consequence, our CI pipeline fails, thereby hindering the release of version `1.18.0.` The rate limits are documented here: https://cloud.google.com/bigquery/quotas#standard_tables Initially, I contemplated skipping this test in CI. However, implementing a `10s` sleep for the problematic `BigQuery` test appears to circumvent the rate limit error, resulting in a fully successful CI build. I will cherrypick this PR in the release branch. --- python-sdk/src/astro/sql/operators/raw_sql.py | 2 +- python-sdk/src/astro/sql/operators/transform.py | 2 +- .../sql/operators/test_load_file.py | 16 +++++++++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/python-sdk/src/astro/sql/operators/raw_sql.py b/python-sdk/src/astro/sql/operators/raw_sql.py index aeb326f91..48b9dc21a 100644 --- a/python-sdk/src/astro/sql/operators/raw_sql.py +++ b/python-sdk/src/astro/sql/operators/raw_sql.py @@ -6,7 +6,7 @@ try: from airflow.decorators.base import TaskDecorator -except ImportError: +except ImportError: # pragma: no cover from airflow.decorators import _TaskDecorator as TaskDecorator # type: ignore[attr-defined] import airflow diff --git a/python-sdk/src/astro/sql/operators/transform.py b/python-sdk/src/astro/sql/operators/transform.py index 0d9cff33c..a2d757653 100644 --- a/python-sdk/src/astro/sql/operators/transform.py +++ b/python-sdk/src/astro/sql/operators/transform.py @@ -5,7 +5,7 @@ try: from airflow.decorators.base import TaskDecorator -except ImportError: +except ImportError: # pragma: no cover from airflow.decorators import _TaskDecorator as TaskDecorator # type: ignore[attr-defined] from airflow.decorators.base import get_unique_task_id, task_decorator_factory diff --git a/python-sdk/tests_integration/sql/operators/test_load_file.py b/python-sdk/tests_integration/sql/operators/test_load_file.py index 8ffaf6d15..b7314581a 100644 --- a/python-sdk/tests_integration/sql/operators/test_load_file.py +++ b/python-sdk/tests_integration/sql/operators/test_load_file.py @@ -207,12 +207,26 @@ def test_aql_load_remote_file_to_dbs(sample_dag, database_table_fixture, remote_ indirect=True, ids=["snowflake", "bigquery", "postgresql", "sqlite", "redshift", "mssql", "mysql", "duckdb"], ) -def test_aql_replace_existing_table(sample_dag, database_table_fixture): +def test_aql_replace_existing_table(sample_dag, database_table_fixture, request): + import time + + test_id = request.node.callspec.id + db, test_table = database_table_fixture data_path_1 = str(CWD) + "/../../data/homes.csv" data_path_2 = str(CWD) + "/../../data/homes2.csv" with sample_dag: + # Bigquery rate limits the number of tables operations per 10s to 5 table operations. + # See more here: https://cloud.google.com/bigquery/quotas#standard_tables + # Hence, introduce a sleep for 10s to avoid rate limit errors. + if test_id == "bigquery": + time.sleep(10) + task_1 = load_file(input_file=File(data_path_1), output_table=test_table) + + if test_id == "bigquery": + time.sleep(10) + task_2 = load_file(input_file=File(data_path_2), output_table=test_table) task_1 >> task_2 # skipcq: PYL-W0104 test_utils.run_dag(sample_dag)