From 2e65e014b7cc06bd891b07884fb80a8a23524e61 Mon Sep 17 00:00:00 2001 From: Rob Davis Date: Tue, 3 Sep 2024 16:10:39 +0100 Subject: [PATCH] add retries to get_airfoil --- setup.cfg | 1 + tests/plugins/core/models/helpers.py | 16 ++++++++++++++++ tests/plugins/domain_adaptation/da_helpers.py | 14 ++++++++++++++ tests/plugins/generic/generic_helpers.py | 14 ++++++++++++++ tests/plugins/privacy/fhelpers.py | 14 ++++++++++++++ tests/utils/test_compression.py | 16 ++++++++++++++++ 6 files changed, 75 insertions(+) diff --git a/setup.cfg b/setup.cfg index 373ddf35..62c88501 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,6 +44,7 @@ install_requires = decaf-synthetic-data>=0.1.6 optuna>=3.1 shap + tenacity tqdm loguru pydantic<2.0 diff --git a/tests/plugins/core/models/helpers.py b/tests/plugins/core/models/helpers.py index 9e0c6b2e..35630af7 100644 --- a/tests/plugins/core/models/helpers.py +++ b/tests/plugins/core/models/helpers.py @@ -1,8 +1,24 @@ +# stdlib +import urllib.error + # third party import pandas as pd +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed +@retry( + stop=stop_after_attempt(5), # Retry up to 5 times + wait=wait_fixed(2), # Wait 2 seconds between retries + retry=retry_if_exception_type(urllib.error.HTTPError), # Retry on HTTPError +) def get_airfoil_dataset() -> pd.DataFrame: + """ + Downloads the Airfoil Self-Noise dataset and returns it as a DataFrame. + + Returns: + pd.DataFrame: The Airfoil Self-Noise dataset. + """ + # Read the dataset from the URL df = pd.read_csv( "https://archive.ics.uci.edu/static/public/291/airfoil+self+noise.zip", sep="\t", diff --git a/tests/plugins/domain_adaptation/da_helpers.py b/tests/plugins/domain_adaptation/da_helpers.py index c3f0e05d..5e0998f1 100644 --- a/tests/plugins/domain_adaptation/da_helpers.py +++ b/tests/plugins/domain_adaptation/da_helpers.py @@ -1,8 +1,10 @@ # stdlib +import urllib.error from typing import Dict, List, Type # third party import pandas as pd +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed # synthcity absolute from synthcity.plugins import Plugin, Plugins @@ -23,7 +25,19 @@ def from_serde() -> Plugin: return [from_api(), from_module(), from_serde()] +@retry( + stop=stop_after_attempt(5), # Retry up to 5 times + wait=wait_fixed(2), # Wait 2 seconds between retries + retry=retry_if_exception_type(urllib.error.HTTPError), # Retry on HTTPError +) def get_airfoil_dataset() -> pd.DataFrame: + """ + Downloads the Airfoil Self-Noise dataset and returns it as a DataFrame. + + Returns: + pd.DataFrame: The Airfoil Self-Noise dataset. + """ + # Read the dataset from the URL df = pd.read_csv( "https://archive.ics.uci.edu/static/public/291/airfoil+self+noise.zip", sep="\t", diff --git a/tests/plugins/generic/generic_helpers.py b/tests/plugins/generic/generic_helpers.py index af2bcd88..e1100169 100644 --- a/tests/plugins/generic/generic_helpers.py +++ b/tests/plugins/generic/generic_helpers.py @@ -1,8 +1,10 @@ # stdlib +import urllib.error from typing import Dict, List, Optional, Type # third party import pandas as pd +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed # synthcity absolute from synthcity.plugins import Plugin @@ -29,7 +31,19 @@ def from_serde() -> Plugin: return [from_api(), from_module(), from_serde()] +@retry( + stop=stop_after_attempt(5), # Retry up to 5 times + wait=wait_fixed(2), # Wait 2 seconds between retries + retry=retry_if_exception_type(urllib.error.HTTPError), # Retry on HTTPError +) def get_airfoil_dataset() -> pd.DataFrame: + """ + Downloads the Airfoil Self-Noise dataset and returns it as a DataFrame. + + Returns: + pd.DataFrame: The Airfoil Self-Noise dataset. + """ + # Read the dataset from the URL df = pd.read_csv( "https://archive.ics.uci.edu/static/public/291/airfoil+self+noise.zip", sep="\t", diff --git a/tests/plugins/privacy/fhelpers.py b/tests/plugins/privacy/fhelpers.py index 0d3b4ddf..04c25ea1 100644 --- a/tests/plugins/privacy/fhelpers.py +++ b/tests/plugins/privacy/fhelpers.py @@ -1,8 +1,10 @@ # stdlib +import urllib.error from typing import Dict, List, Type # third party import pandas as pd +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed # synthcity absolute from synthcity.plugins import Plugin, Plugins @@ -25,7 +27,19 @@ def from_serde() -> Plugin: return [from_api(), from_module(), from_serde()] +@retry( + stop=stop_after_attempt(5), # Retry up to 5 times + wait=wait_fixed(2), # Wait 2 seconds between retries + retry=retry_if_exception_type(urllib.error.HTTPError), # Retry on HTTPError +) def get_airfoil_dataset() -> pd.DataFrame: + """ + Downloads the Airfoil Self-Noise dataset and returns it as a DataFrame. + + Returns: + pd.DataFrame: The Airfoil Self-Noise dataset. + """ + # Read the dataset from the URL df = pd.read_csv( "https://archive.ics.uci.edu/static/public/291/airfoil+self+noise.zip", sep="\t", diff --git a/tests/utils/test_compression.py b/tests/utils/test_compression.py index 6807da0f..d9aa984e 100644 --- a/tests/utils/test_compression.py +++ b/tests/utils/test_compression.py @@ -1,12 +1,28 @@ +# stdlib +import urllib.error + # third party import pandas as pd from sklearn.datasets import load_diabetes +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed # synthcity absolute from synthcity.utils.compression import compress_dataset, decompress_dataset +@retry( + stop=stop_after_attempt(5), # Retry up to 5 times + wait=wait_fixed(2), # Wait 2 seconds between retries + retry=retry_if_exception_type(urllib.error.HTTPError), # Retry on HTTPError +) def get_airfoil_dataset() -> pd.DataFrame: + """ + Downloads the Airfoil Self-Noise dataset and returns it as a DataFrame. + + Returns: + pd.DataFrame: The Airfoil Self-Noise dataset. + """ + # Read the dataset from the URL df = pd.read_csv( "https://archive.ics.uci.edu/static/public/291/airfoil+self+noise.zip", sep="\t",