From 5b597be4861d1aa60ba14710fd284a6ce5b71b18 Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Thu, 10 Oct 2024 16:24:03 +0200 Subject: [PATCH 01/10] Retry requests --- kbcstorage/base.py | 9 ++-- kbcstorage/retry_requests.py | 31 +++++++++++++ tests/mocks/test_retry.py | 88 ++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 kbcstorage/retry_requests.py create mode 100644 tests/mocks/test_retry.py diff --git a/kbcstorage/base.py b/kbcstorage/base.py index 5b12f53..6b20510 100644 --- a/kbcstorage/base.py +++ b/kbcstorage/base.py @@ -9,6 +9,7 @@ .. _Storage API documentation: http://docs.keboola.apiary.io/ """ +from . import retry_requests import requests @@ -66,7 +67,7 @@ def _get_raw(self, url, params=None, **kwargs): headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = requests.get(url, params, headers=headers, **kwargs) + r = retry_requests.get(url, params=params, headers=headers, **kwargs) try: r.raise_for_status() except requests.HTTPError: @@ -111,7 +112,7 @@ def _post(self, *args, **kwargs): """ headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = requests.post(headers=headers, *args, **kwargs) + r = retry_requests.post(headers=headers, *args, **kwargs) try: r.raise_for_status() except requests.HTTPError: @@ -137,7 +138,7 @@ def _put(self, *args, **kwargs): """ headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = requests.put(headers=headers, *args, **kwargs) + r = retry_requests.put(headers=headers, *args, **kwargs) try: r.raise_for_status() except requests.HTTPError: @@ -163,7 +164,7 @@ def _delete(self, *args, **kwargs): """ headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = requests.delete(headers=headers, *args, **kwargs) + r = retry_requests.delete(headers=headers, *args, **kwargs) try: r.raise_for_status() except requests.HTTPError: diff --git a/kbcstorage/retry_requests.py b/kbcstorage/retry_requests.py new file mode 100644 index 0000000..264e02a --- /dev/null +++ b/kbcstorage/retry_requests.py @@ -0,0 +1,31 @@ +import time +import requests + +MAX_RETRIES = 5 +BACKOFF_FACTOR = 1.0 +RETRY_STATUS_CODES = {500, 502, 503, 504} + + +def _get_backoff_time(retry_count): + return BACKOFF_FACTOR * (2 ** retry_count) + +def _retry_request(request_func, url, *args, **kwargs): + response = request_func(url, *args, **kwargs) + for retry_count in range(1, MAX_RETRIES): + if response.status_code not in RETRY_STATUS_CODES: + return response + time.sleep(_get_backoff_time(retry_count)) + response = request_func(url, **kwargs) + return response + +def get(url, *args, **kwargs): + return _retry_request(requests.get, url, *args, **kwargs) + +def post(url, *args, **kwargs): + return _retry_request(requests.post, url, *args, **kwargs) + +def put(url, *args, **kwargs): + return _retry_request(requests.put, url, *args, **kwargs) + +def delete(url, *args, **kwargs): + return _retry_request(requests.delete, url, *args, **kwargs) \ No newline at end of file diff --git a/tests/mocks/test_retry.py b/tests/mocks/test_retry.py new file mode 100644 index 0000000..01e8bc1 --- /dev/null +++ b/tests/mocks/test_retry.py @@ -0,0 +1,88 @@ +""" +Test basic functionality of the Tables endpoint +""" +import time +import unittest +from unittest.mock import patch +from urllib.error import HTTPError + +import requests +import responses + +from kbcstorage.tables import Tables + +from .table_responses import list_response + + +class TestRequestRetry(unittest.TestCase): + """ + Test that requests are retried. + """ + def setUp(self): + token = 'dummy_token' + base_url = 'https://connection.keboola.com/' + self.tables = Tables(base_url, token) + + @responses.activate + @patch('time.sleep', return_value=None) + def test_ok(self, sleep_mock): + """ + Retry will try at least 5 times. + """ + for _ in range(4): + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/tables', + json=list_response, + status=502 + ) + ) + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/tables', + json=list_response, + ) + ) + tables_list = self.tables.list() + assert isinstance(tables_list, list) + + @responses.activate + @patch('time.sleep', return_value=None) + def test_raises_error_many_tries(self, sleep_mock): + """ + Retry will fail if it gets enough of error responses. + """ + for _ in range(20): + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/tables', + json=list_response, + status=502 + ) + ) + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/tables', + json=list_response, + ) + ) + with self.assertRaises(requests.exceptions.HTTPError): + self.tables.list() + + @responses.activate + @patch('time.sleep', return_value=None) + def test_raises_error_on_4xx(self, sleep_mock): + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/tables', + json=list_response, + status=401 + ) + ) + with self.assertRaises(requests.exceptions.HTTPError): + self.tables.list() From 2fa6f42874c59d2941afc8fd2ecc7b445068709a Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Mon, 14 Oct 2024 13:06:54 +0200 Subject: [PATCH 02/10] adjust logic --- kbcstorage/retry_requests.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kbcstorage/retry_requests.py b/kbcstorage/retry_requests.py index 264e02a..b549671 100644 --- a/kbcstorage/retry_requests.py +++ b/kbcstorage/retry_requests.py @@ -3,16 +3,14 @@ MAX_RETRIES = 5 BACKOFF_FACTOR = 1.0 -RETRY_STATUS_CODES = {500, 502, 503, 504} - def _get_backoff_time(retry_count): return BACKOFF_FACTOR * (2 ** retry_count) def _retry_request(request_func, url, *args, **kwargs): response = request_func(url, *args, **kwargs) - for retry_count in range(1, MAX_RETRIES): - if response.status_code not in RETRY_STATUS_CODES: + for retry_count in range(MAX_RETRIES - 1): + if response.status_code == 501 or response.status_code < 500: return response time.sleep(_get_backoff_time(retry_count)) response = request_func(url, **kwargs) From 2d3e7161c8fef9b9d768583f8e58e6ee91fa8894 Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Mon, 14 Oct 2024 13:10:04 +0200 Subject: [PATCH 03/10] adjust max retries --- kbcstorage/retry_requests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kbcstorage/retry_requests.py b/kbcstorage/retry_requests.py index b549671..604c890 100644 --- a/kbcstorage/retry_requests.py +++ b/kbcstorage/retry_requests.py @@ -1,7 +1,7 @@ import time import requests -MAX_RETRIES = 5 +MAX_RETRIES = 11 BACKOFF_FACTOR = 1.0 def _get_backoff_time(retry_count): From ef0452b7b99d0e977498743300f5cb7c57b56ea5 Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Tue, 15 Oct 2024 15:45:58 +0200 Subject: [PATCH 04/10] docker-compose -> docker compose --- .github/workflows/push.yml | 10 +++++----- README.md | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index f129278..f29b836 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -35,10 +35,10 @@ jobs: run: docker login --username "$DOCKERHUB_USER" --password "$DOCKERHUB_TOKEN" - name: Build image - run: docker-compose build ci + run: docker compose build ci - name: Check image - run: docker-compose run --rm ci -m flake8 + run: docker compose run --rm ci -m flake8 - name: Set image tag id: tag @@ -69,7 +69,7 @@ jobs: - name: Run Tests run: | - docker-compose run --rm -e KBC_TEST_TOKEN=$KBC_TEST_TOKEN -e KBC_TEST_API_URL=$KBC_TEST_API_URL -e SKIP_ABS_TESTS=1 ci -m unittest --verbose + docker compose run --rm -e KBC_TEST_TOKEN=$KBC_TEST_TOKEN -e KBC_TEST_API_URL=$KBC_TEST_API_URL -e SKIP_ABS_TESTS=1 ci -m unittest --verbose tests_azure: name: Run tests (Azure) @@ -86,7 +86,7 @@ jobs: - name: Run Tests run: | - docker-compose run --rm -e KBC_TEST_TOKEN=$KBC_AZ_TEST_TOKEN -e KBC_TEST_API_URL=$KBC_AZ_TEST_API_URL -e SKIP_ABS_TESTS=1 ci -m unittest --verbose + docker compose run --rm -e KBC_TEST_TOKEN=$KBC_AZ_TEST_TOKEN -e KBC_TEST_API_URL=$KBC_AZ_TEST_API_URL -e SKIP_ABS_TESTS=1 ci -m unittest --verbose tests_gcp: name: Run tests (GCP) @@ -103,7 +103,7 @@ jobs: - name: Run Tests run: | - docker-compose run --rm -e KBC_TEST_TOKEN=$KBC_GCP_TEST_TOKEN -e KBC_TEST_API_URL=$KBC_GCP_TEST_API_URL -e SKIP_ABS_TESTS=1 ci -m unittest --verbose + docker compose run --rm -e KBC_TEST_TOKEN=$KBC_GCP_TEST_TOKEN -e KBC_TEST_API_URL=$KBC_GCP_TEST_API_URL -e SKIP_ABS_TESTS=1 ci -m unittest --verbose deploy_to_pypi: needs: diff --git a/README.md b/README.md index 1421d1c..969c176 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ tables.detail('in.c-demo.some-table') Create `.env` file according to the `.env.template` file and run the tests with: ```bash -$ docker-compose run --rm -e KBC_TEST_TOKEN -e KBC_TEST_API_URL sapi-python-client -m unittest discover +$ docker compose run --rm -e KBC_TEST_TOKEN -e KBC_TEST_API_URL sapi-python-client -m unittest discover ``` ## Contribution Guide From 7da4081758b53afef22e250ced2463a663d654ae Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Tue, 15 Oct 2024 15:48:47 +0200 Subject: [PATCH 05/10] fix whitespace --- kbcstorage/retry_requests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kbcstorage/retry_requests.py b/kbcstorage/retry_requests.py index 604c890..423230a 100644 --- a/kbcstorage/retry_requests.py +++ b/kbcstorage/retry_requests.py @@ -4,9 +4,11 @@ MAX_RETRIES = 11 BACKOFF_FACTOR = 1.0 + def _get_backoff_time(retry_count): return BACKOFF_FACTOR * (2 ** retry_count) + def _retry_request(request_func, url, *args, **kwargs): response = request_func(url, *args, **kwargs) for retry_count in range(MAX_RETRIES - 1): @@ -16,14 +18,18 @@ def _retry_request(request_func, url, *args, **kwargs): response = request_func(url, **kwargs) return response + def get(url, *args, **kwargs): return _retry_request(requests.get, url, *args, **kwargs) + def post(url, *args, **kwargs): return _retry_request(requests.post, url, *args, **kwargs) + def put(url, *args, **kwargs): return _retry_request(requests.put, url, *args, **kwargs) + def delete(url, *args, **kwargs): - return _retry_request(requests.delete, url, *args, **kwargs) \ No newline at end of file + return _retry_request(requests.delete, url, *args, **kwargs) From 46eed86d33c61ff7cf163c7bfd9eeff8e9ccbcac Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Tue, 15 Oct 2024 15:53:15 +0200 Subject: [PATCH 06/10] fix imports --- tests/mocks/test_retry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/mocks/test_retry.py b/tests/mocks/test_retry.py index 01e8bc1..239ab77 100644 --- a/tests/mocks/test_retry.py +++ b/tests/mocks/test_retry.py @@ -1,10 +1,8 @@ """ Test basic functionality of the Tables endpoint """ -import time import unittest from unittest.mock import patch -from urllib.error import HTTPError import requests import responses From dc1369a7b37e5be1d695b79ff1438aa914936305 Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Tue, 15 Oct 2024 15:53:57 +0200 Subject: [PATCH 07/10] asd --- tests/mocks/test_retry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mocks/test_retry.py b/tests/mocks/test_retry.py index 239ab77..44a6355 100644 --- a/tests/mocks/test_retry.py +++ b/tests/mocks/test_retry.py @@ -1,5 +1,5 @@ """ -Test basic functionality of the Tables endpoint +Test that requests are retried if the server is not available. """ import unittest from unittest.mock import patch From 207ffc4d038a71d33ddd07e47fba222fb97a3f4b Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Tue, 15 Oct 2024 16:46:51 +0200 Subject: [PATCH 08/10] make max number of retries a param --- kbcstorage/base.py | 13 ++++++------ kbcstorage/retry_requests.py | 40 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/kbcstorage/base.py b/kbcstorage/base.py index 6b20510..a8d481d 100644 --- a/kbcstorage/base.py +++ b/kbcstorage/base.py @@ -9,7 +9,7 @@ .. _Storage API documentation: http://docs.keboola.apiary.io/ """ -from . import retry_requests +from kbcstorage.retry_requests import MAX_RETRIES_DEFAULT, RetryRequests import requests @@ -22,7 +22,7 @@ class Endpoint: base_url (str): The base URL for this endpoint. token (str): A key for the Storage API. """ - def __init__(self, root_url, path_component, token): + def __init__(self, root_url, path_component, token, max_requests_retries=MAX_RETRIES_DEFAULT): """ Create an endpoint. @@ -45,6 +45,7 @@ def __init__(self, root_url, path_component, token): self._auth_header = {'X-StorageApi-Token': self.token, 'Accept-Encoding': 'gzip', 'User-Agent': 'Keboola Storage API Python Client'} + self.requests = RetryRequests(max_requests_retries) def _get_raw(self, url, params=None, **kwargs): """ @@ -67,7 +68,7 @@ def _get_raw(self, url, params=None, **kwargs): headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = retry_requests.get(url, params=params, headers=headers, **kwargs) + r = self.requests.get(url, params=params, headers=headers, **kwargs) try: r.raise_for_status() except requests.HTTPError: @@ -112,7 +113,7 @@ def _post(self, *args, **kwargs): """ headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = retry_requests.post(headers=headers, *args, **kwargs) + r = self.requests.post(headers=headers, *args, **kwargs) try: r.raise_for_status() except requests.HTTPError: @@ -138,7 +139,7 @@ def _put(self, *args, **kwargs): """ headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = retry_requests.put(headers=headers, *args, **kwargs) + r = self.requests.put(headers=headers, *args, **kwargs) try: r.raise_for_status() except requests.HTTPError: @@ -164,7 +165,7 @@ def _delete(self, *args, **kwargs): """ headers = kwargs.pop('headers', {}) headers.update(self._auth_header) - r = retry_requests.delete(headers=headers, *args, **kwargs) + r = self.requests.delete(headers=headers, *args, **kwargs) try: r.raise_for_status() except requests.HTTPError: diff --git a/kbcstorage/retry_requests.py b/kbcstorage/retry_requests.py index 423230a..fba0920 100644 --- a/kbcstorage/retry_requests.py +++ b/kbcstorage/retry_requests.py @@ -1,7 +1,7 @@ import time import requests -MAX_RETRIES = 11 +MAX_RETRIES_DEFAULT = 11 BACKOFF_FACTOR = 1.0 @@ -9,27 +9,27 @@ def _get_backoff_time(retry_count): return BACKOFF_FACTOR * (2 ** retry_count) -def _retry_request(request_func, url, *args, **kwargs): - response = request_func(url, *args, **kwargs) - for retry_count in range(MAX_RETRIES - 1): - if response.status_code == 501 or response.status_code < 500: - return response - time.sleep(_get_backoff_time(retry_count)) - response = request_func(url, **kwargs) - return response +class RetryRequests: + def __init__(self, max_requests_retries=MAX_RETRIES_DEFAULT) -> None: + self.max_retries = max_requests_retries + def _retry_request(self, request_func, url, *args, **kwargs): + response = request_func(url, *args, **kwargs) + for retry_count in range(self.max_retries - 1): + if response.status_code == 501 or response.status_code < 500: + return response + time.sleep(_get_backoff_time(retry_count)) + response = request_func(url, **kwargs) + return response -def get(url, *args, **kwargs): - return _retry_request(requests.get, url, *args, **kwargs) + def get(self, url, *args, **kwargs): + return self._retry_request(requests.get, url, *args, **kwargs) + def post(self, url, *args, **kwargs): + return self._retry_request(requests.post, url, *args, **kwargs) -def post(url, *args, **kwargs): - return _retry_request(requests.post, url, *args, **kwargs) + def put(self, url, *args, **kwargs): + return self._retry_request(requests.put, url, *args, **kwargs) - -def put(url, *args, **kwargs): - return _retry_request(requests.put, url, *args, **kwargs) - - -def delete(url, *args, **kwargs): - return _retry_request(requests.delete, url, *args, **kwargs) + def delete(self, url, *args, **kwargs): + return self._retry_request(requests.delete, url, *args, **kwargs) From 10003c3fe01bc4cfc0ad3bd088c2b9dabb683cb0 Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Thu, 17 Oct 2024 13:58:57 +0200 Subject: [PATCH 09/10] Test also changing the max retry attemps. --- tests/mocks/test_retry.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/mocks/test_retry.py b/tests/mocks/test_retry.py index 44a6355..2c1246c 100644 --- a/tests/mocks/test_retry.py +++ b/tests/mocks/test_retry.py @@ -7,11 +7,20 @@ import requests import responses +from kbcstorage.base import Endpoint from kbcstorage.tables import Tables from .table_responses import list_response +class NoRetriesEndpoint(Endpoint): + def __init__(self, root_url, token): + super().__init__(root_url, 'no-retries', token, max_requests_retries=0) + + def list(self): + return self._get(self.base_url) + + class TestRequestRetry(unittest.TestCase): """ Test that requests are retried. @@ -20,6 +29,7 @@ def setUp(self): token = 'dummy_token' base_url = 'https://connection.keboola.com/' self.tables = Tables(base_url, token) + self.no_retries = NoRetriesEndpoint(base_url, token) @responses.activate @patch('time.sleep', return_value=None) @@ -84,3 +94,27 @@ def test_raises_error_on_4xx(self, sleep_mock): ) with self.assertRaises(requests.exceptions.HTTPError): self.tables.list() + + @responses.activate + @patch('time.sleep', return_value=None) + def test_no_retries(self, sleep_mock): + """ + Request wont be retried for endpoints that configured it. + """ + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/no-retries', + json=list_response, + status=502 + ) + ) + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/no-retries', + json=list_response, + ) + ) + with self.assertRaises(requests.exceptions.HTTPError): + self.no_retries.list() From df86260399b643a6a9be7fa4660d3324b2efcea7 Mon Sep 17 00:00:00 2001 From: Tomas Kukan Date: Thu, 17 Oct 2024 14:15:25 +0200 Subject: [PATCH 10/10] improve tests --- tests/mocks/test_retry.py | 61 +++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/tests/mocks/test_retry.py b/tests/mocks/test_retry.py index 2c1246c..b6b6082 100644 --- a/tests/mocks/test_retry.py +++ b/tests/mocks/test_retry.py @@ -13,9 +13,9 @@ from .table_responses import list_response -class NoRetriesEndpoint(Endpoint): - def __init__(self, root_url, token): - super().__init__(root_url, 'no-retries', token, max_requests_retries=0) +class RetriesEndpoint(Endpoint): + def __init__(self, root_url, token, retries_count): + super().__init__(root_url, 'retries', token, max_requests_retries=retries_count) def list(self): return self._get(self.base_url) @@ -29,7 +29,8 @@ def setUp(self): token = 'dummy_token' base_url = 'https://connection.keboola.com/' self.tables = Tables(base_url, token) - self.no_retries = NoRetriesEndpoint(base_url, token) + self.no_retries = RetriesEndpoint(base_url, token, 0) + self.two_retries = RetriesEndpoint(base_url, token, 2) @responses.activate @patch('time.sleep', return_value=None) @@ -104,7 +105,7 @@ def test_no_retries(self, sleep_mock): responses.add( responses.Response( method='GET', - url='https://connection.keboola.com/v2/storage/no-retries', + url='https://connection.keboola.com/v2/storage/retries', json=list_response, status=502 ) @@ -112,9 +113,57 @@ def test_no_retries(self, sleep_mock): responses.add( responses.Response( method='GET', - url='https://connection.keboola.com/v2/storage/no-retries', + url='https://connection.keboola.com/v2/storage/retries', json=list_response, ) ) with self.assertRaises(requests.exceptions.HTTPError): self.no_retries.list() + + @responses.activate + @patch('time.sleep', return_value=None) + def test_two_retries_fail(self, sleep_mock): + """ + Request wont be retried for endpoints that configured it. + """ + for i in range(3): + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/retries', + json=list_response, + status=502 + ) + ) + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/retries', + json=list_response, + ) + ) + with self.assertRaises(requests.exceptions.HTTPError): + self.two_retries.list() + + @responses.activate + @patch('time.sleep', return_value=None) + def test_two_retries_ok(self, sleep_mock): + """ + Request wont be retried for endpoints that configured it. + """ + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/retries', + json=list_response, + status=502 + ) + ) + responses.add( + responses.Response( + method='GET', + url='https://connection.keboola.com/v2/storage/retries', + json=list_response, + ) + ) + assert isinstance(self.two_retries.list(), list)