From 918a664f8192aea1075cb9d5ba611647477e4df6 Mon Sep 17 00:00:00 2001 From: Rob Davis Date: Fri, 13 Sep 2024 15:12:50 +0100 Subject: [PATCH] clean up --- .github/workflows/test_pr.yml | 31 +++++---------------- setup.cfg | 1 - src/synthcity/metrics/eval_performance.py | 2 +- src/synthcity/utils/compression.py | 2 +- tests/conftest.py | 10 ------- tests/metrics/test_attacks.py | 6 ---- tests/metrics/test_detection.py | 15 ++-------- tests/metrics/test_performance.py | 15 ++-------- tests/metrics/test_privacy.py | 13 +-------- tests/metrics/test_sanity.py | 15 ++-------- tests/metrics/test_statistical.py | 15 ++-------- tests/plugins/core/models/test_convnet.py | 16 +---------- tests/plugins/core/models/test_image_gan.py | 14 +--------- tests/plugins/core/test_dataloader.py | 28 +++---------------- tests/plugins/images/test_image_adsgan.py | 25 +++-------------- tests/plugins/images/test_image_cgan.py | 25 +++-------------- tests/plugins/privacy/test_adsgan.py | 1 - tests/plugins/privacy/test_aim.py | 2 -- 18 files changed, 32 insertions(+), 204 deletions(-) diff --git a/.github/workflows/test_pr.yml b/.github/workflows/test_pr.yml index 9c97481f..7d62b6cd 100644 --- a/.github/workflows/test_pr.yml +++ b/.github/workflows/test_pr.yml @@ -37,58 +37,41 @@ jobs: strategy: matrix: python-version: ["3.11"] - os: [macos-latest, ubuntu-latest] + os: [macos-latest, ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v3 with: submodules: true - - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: "pip" - - - name: Install libomp (macOS only) - run: | - brew install libomp - if: ${{ runner.os == 'macOS' }} - - name: Set libomp environment variables (macOS only) run: | + brew install libomp LIBOMP_PATH="/opt/homebrew/opt/libomp" echo "LDFLAGS=-L${LIBOMP_PATH}/lib" >> $GITHUB_ENV echo "CPPFLAGS=-I${LIBOMP_PATH}/include" >> $GITHUB_ENV echo "DYLD_LIBRARY_PATH=${LIBOMP_PATH}/lib:\$DYLD_LIBRARY_PATH" >> $GITHUB_ENV echo "OMP_PATH=${LIBOMP_PATH}/include" >> $GITHUB_ENV if: ${{ runner.os == 'macOS' }} - - name: Install dependencies run: | python -m pip install -U pip pip install -r prereq.txt - pip install pytest-timeout - - - name: Dump GitHub Action environment - run: | - echo "OS: $(uname -a)" - python --version - pip freeze - - name: Limit OpenMP threads run: | echo "OMP_NUM_THREADS=2" >> $GITHUB_ENV - - # - name: Test Core - # run: | - # pip install .[testing] - # pip freeze - # pytest -vvvsx --timeout=2000 --timeout-method=thread -m "not slow" --durations=50 + - name: Test Core + run: | + pip install .[testing] + pip freeze + pytest -vvvsx -m "not slow" --durations=50 - name: Set macOS deployment target run: | echo "MACOSX_DEPLOYMENT_TARGET=10.13" >> $GITHUB_ENV if: ${{ runner.os == 'macOS' }} - - name: Test GOGGLE run: | pip install .[testing,goggle] diff --git a/setup.cfg b/setup.cfg index eef6d848..62c88501 100644 --- a/setup.cfg +++ b/setup.cfg @@ -89,7 +89,6 @@ testing = nbformat pytest-benchmark pytest-xdist[psutil] - pytest_timeout pytest-xprocess igraph py # pytest 7.2.0 bug https://github.com/pytest-dev/pytest-xprocess/issues/110 diff --git a/src/synthcity/metrics/eval_performance.py b/src/synthcity/metrics/eval_performance.py index 47320066..2eb45bca 100644 --- a/src/synthcity/metrics/eval_performance.py +++ b/src/synthcity/metrics/eval_performance.py @@ -1066,7 +1066,7 @@ def evaluate( model = XGBClassifier( tree_method="approx", n_jobs=2, - verbosity=2, + verbosity=0, depth=3, random_state=self._random_state, ) diff --git a/src/synthcity/utils/compression.py b/src/synthcity/utils/compression.py index fabca8fd..1fccbc09 100644 --- a/src/synthcity/utils/compression.py +++ b/src/synthcity/utils/compression.py @@ -48,7 +48,7 @@ def compress_dataset( model = model = XGBClassifier( tree_method="approx", n_jobs=2, - verbosity=2, + verbosity=0, depth=3, ) try: diff --git a/tests/conftest.py b/tests/conftest.py index f5529a6b..3b02b350 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,13 +21,3 @@ def run_before_tests() -> Generator: workspace = Path("workspace") if workspace.exists(): shutil.rmtree(workspace, ignore_errors=True) - - -# Hook to modify the test result if it exceeds a timeout -def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo) -> None: - """Modify the test result if it exceeds the timeout to skip instead of failing.""" - if call.when == "call" and call.excinfo is not None: - print(f"Call info: {call}") - # Check if the test was stopped due to a timeout using call.result - if "Timeout" in str(call.excinfo.value): - pytest.skip(f"Test skipped due to exceeding the timeout: {item.nodeid}") diff --git a/tests/metrics/test_attacks.py b/tests/metrics/test_attacks.py index 497606ed..241a6c3c 100644 --- a/tests/metrics/test_attacks.py +++ b/tests/metrics/test_attacks.py @@ -1,5 +1,4 @@ # stdlib -import sys from typing import Type # third party @@ -16,7 +15,6 @@ from synthcity.plugins.core.dataloader import GenericDataLoader -@pytest.mark.skipif(sys.platform == "darwin", reason="Test skipped on MacOS") @pytest.mark.parametrize("reduction", ["mean", "max", "min"]) @pytest.mark.parametrize( "evaluator_t", @@ -54,10 +52,6 @@ def test_reduction(reduction: str, evaluator_t: Type) -> None: assert def_score == score[reduction] -@pytest.mark.skipif( - sys.platform == "darwin", - reason="Test skipped on MacOS", -) @pytest.mark.parametrize( "evaluator_t", [ diff --git a/tests/metrics/test_detection.py b/tests/metrics/test_detection.py index b8500cba..c01a2c9f 100644 --- a/tests/metrics/test_detection.py +++ b/tests/metrics/test_detection.py @@ -1,7 +1,5 @@ # stdlib -import os import sys -from pathlib import Path from typing import Type # third party @@ -173,17 +171,8 @@ def test_detect_synth_timeseries(test_plugin: Plugin, evaluator_t: Type) -> None @pytest.mark.slow_1 @pytest.mark.slow def test_image_support_detection() -> None: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) + + dataset = datasets.MNIST(".", download=True) X1 = ImageDataLoader(dataset).sample(100) X2 = ImageDataLoader(dataset).sample(100) diff --git a/tests/metrics/test_performance.py b/tests/metrics/test_performance.py index 45824dd0..3e582112 100644 --- a/tests/metrics/test_performance.py +++ b/tests/metrics/test_performance.py @@ -1,7 +1,5 @@ # stdlib -import os import sys -from pathlib import Path from typing import Optional, Type # third party @@ -484,17 +482,8 @@ def test_evaluate_performance_time_series_survival( @pytest.mark.slow_1 @pytest.mark.slow def test_image_support_perf() -> None: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) + + dataset = datasets.MNIST(".", download=True) X1 = ImageDataLoader(dataset).sample(100) X2 = ImageDataLoader(dataset).sample(100) diff --git a/tests/metrics/test_privacy.py b/tests/metrics/test_privacy.py index 40dac9b6..dd598d4c 100644 --- a/tests/metrics/test_privacy.py +++ b/tests/metrics/test_privacy.py @@ -1,7 +1,5 @@ # stdlib -import os import sys -from pathlib import Path from typing import Type # third party @@ -86,17 +84,8 @@ def test_evaluator(evaluator_t: Type, test_plugin: Plugin) -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_image_support() -> None: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) + dataset = datasets.MNIST(".", download=True) X1 = ImageDataLoader(dataset).sample(100) X2 = ImageDataLoader(dataset).sample(100) diff --git a/tests/metrics/test_sanity.py b/tests/metrics/test_sanity.py index bc4660d6..caabfcb7 100644 --- a/tests/metrics/test_sanity.py +++ b/tests/metrics/test_sanity.py @@ -1,7 +1,5 @@ # stdlib -import os import sys -from pathlib import Path from typing import Callable, Tuple # third party @@ -199,17 +197,8 @@ def test_evaluate_distant_values(test_plugin: Plugin) -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_image_support() -> None: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) + + dataset = datasets.MNIST(".", download=True) X1 = ImageDataLoader(dataset).sample(100) X2 = ImageDataLoader(dataset).sample(100) diff --git a/tests/metrics/test_statistical.py b/tests/metrics/test_statistical.py index c0f2a822..f01bd6d5 100644 --- a/tests/metrics/test_statistical.py +++ b/tests/metrics/test_statistical.py @@ -1,7 +1,5 @@ # stdlib -import os import sys -from pathlib import Path from typing import Any, Tuple, Type # third party @@ -289,17 +287,8 @@ def test_evaluate_survival_km_distance(test_plugin: Plugin) -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_image_support() -> None: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) + + dataset = datasets.MNIST(".", download=True) X1 = ImageDataLoader(dataset).sample(100) X2 = ImageDataLoader(dataset).sample(100) diff --git a/tests/plugins/core/models/test_convnet.py b/tests/plugins/core/models/test_convnet.py index 3e90fd9a..71399626 100644 --- a/tests/plugins/core/models/test_convnet.py +++ b/tests/plugins/core/models/test_convnet.py @@ -1,7 +1,3 @@ -# stdlib -import os -from pathlib import Path - # third party import numpy as np import pytest @@ -67,17 +63,7 @@ def test_train_clf() -> None: transforms.Normalize(mean=(0.5,), std=(0.5,)), ] ) - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True, transform=data_transform) - else: - dataset = datasets.MNIST(mnist_dir, train=True, transform=data_transform) + dataset = datasets.MNIST(".", download=True, transform=data_transform) dataset = Subset(dataset, np.arange(len(dataset))[:100]) classes = 10 diff --git a/tests/plugins/core/models/test_image_gan.py b/tests/plugins/core/models/test_image_gan.py index 3dc457a9..a13da720 100644 --- a/tests/plugins/core/models/test_image_gan.py +++ b/tests/plugins/core/models/test_image_gan.py @@ -1,7 +1,3 @@ -# stdlib -import os -from pathlib import Path - # third party import numpy as np import pytest @@ -30,15 +26,7 @@ # Load MNIST dataset as tensors batch_size = 128 -# Get the MNIST dataset directory from an environment variable -mnist_dir = os.getenv("MNIST_DATA_DIR", ".") # Default to current directory if not set - -# Check if the MNIST dataset is already downloaded -mnist_path = Path(mnist_dir) / "MNIST" / "processed" -if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True, transform=data_transform) -else: - dataset = datasets.MNIST(mnist_dir, train=True, transform=data_transform) +dataset = datasets.MNIST(".", download=True, transform=data_transform) dataset = Subset(dataset, np.arange(len(dataset))[:100]) dataset = FlexibleDataset(dataset) diff --git a/tests/plugins/core/test_dataloader.py b/tests/plugins/core/test_dataloader.py index 67db6351..8da993fc 100644 --- a/tests/plugins/core/test_dataloader.py +++ b/tests/plugins/core/test_dataloader.py @@ -1,8 +1,6 @@ # stdlib -import os import sys from datetime import datetime -from pathlib import Path from typing import Any # third party @@ -642,17 +640,8 @@ def test_time_series_survival_pack_unpack_padding(as_numpy: bool) -> None: @pytest.mark.parametrize("height", [55, 64]) @pytest.mark.parametrize("width", [32, 22]) def test_image_dataloader_sanity(height: int, width: int) -> None: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) + + dataset = datasets.MNIST(".", download=True) loader = ImageDataLoader( data=dataset, @@ -693,17 +682,8 @@ def test_image_dataloader_sanity(height: int, width: int) -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_image_dataloader_create_from_info() -> None: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) + + dataset = datasets.MNIST(".", download=True) loader = ImageDataLoader( data=dataset, diff --git a/tests/plugins/images/test_image_adsgan.py b/tests/plugins/images/test_image_adsgan.py index b9320ac3..a1b6414f 100644 --- a/tests/plugins/images/test_image_adsgan.py +++ b/tests/plugins/images/test_image_adsgan.py @@ -1,7 +1,5 @@ # stdlib -import os import sys -from pathlib import Path # third party import numpy as np @@ -17,21 +15,6 @@ plugin_name = "image_adsgan" -def get_mnist() -> datasets.MNIST: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) - return dataset - - @pytest.mark.parametrize("test_plugin", generate_fixtures(plugin_name, plugin)) def test_plugin_sanity(test_plugin: Plugin) -> None: assert test_plugin is not None @@ -54,7 +37,7 @@ def test_plugin_hyperparams(test_plugin: Plugin) -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_plugin_fit() -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=5) X = ImageDataLoader(dataset).sample(100) @@ -64,7 +47,7 @@ def test_plugin_fit() -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_plugin_generate() -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=10, n_units_latent=13) X = ImageDataLoader(dataset).sample(100) @@ -83,7 +66,7 @@ def test_plugin_generate() -> None: @pytest.mark.slow_2 @pytest.mark.slow def test_plugin_generate_with_conditional() -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=10, n_units_latent=13) X = ImageDataLoader(dataset).sample(100) @@ -100,7 +83,7 @@ def test_plugin_generate_with_conditional() -> None: @pytest.mark.slow_2 @pytest.mark.slow def test_plugin_generate_with_stop_conditional() -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=10, n_units_latent=13, n_iter_print=2) X = ImageDataLoader(dataset).sample(100) diff --git a/tests/plugins/images/test_image_cgan.py b/tests/plugins/images/test_image_cgan.py index 8542aa37..fc30f84f 100644 --- a/tests/plugins/images/test_image_cgan.py +++ b/tests/plugins/images/test_image_cgan.py @@ -1,7 +1,5 @@ # stdlib -import os import sys -from pathlib import Path # third party import numpy as np @@ -17,21 +15,6 @@ plugin_name = "image_cgan" -def get_mnist() -> datasets.MNIST: - # Get the MNIST dataset directory from an environment variable - mnist_dir = os.getenv( - "MNIST_DATA_DIR", "." - ) # Default to current directory if not set - - # Check if the MNIST dataset is already downloaded - mnist_path = Path(mnist_dir) / "MNIST" / "processed" - if not mnist_path.exists(): - dataset = datasets.MNIST(mnist_dir, download=True) - else: - dataset = datasets.MNIST(mnist_dir, train=True) - return dataset - - @pytest.mark.parametrize("test_plugin", generate_fixtures(plugin_name, plugin)) def test_plugin_sanity(test_plugin: Plugin) -> None: assert test_plugin is not None @@ -57,7 +40,7 @@ def test_plugin_hyperparams(test_plugin: Plugin) -> None: @pytest.mark.slow_2 @pytest.mark.slow def test_plugin_fit(height: int) -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=5) X = ImageDataLoader(dataset, height=height).sample(100) @@ -67,7 +50,7 @@ def test_plugin_fit(height: int) -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_plugin_generate() -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=10, n_units_latent=13) X = ImageDataLoader(dataset).sample(100) @@ -84,7 +67,7 @@ def test_plugin_generate() -> None: @pytest.mark.skipif(sys.platform != "linux", reason="Linux only for faster results") def test_plugin_generate_with_conditional() -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=10, n_units_latent=13) X = ImageDataLoader(dataset).sample(100) @@ -101,7 +84,7 @@ def test_plugin_generate_with_conditional() -> None: @pytest.mark.slow_2 @pytest.mark.slow def test_plugin_generate_with_stop_conditional() -> None: - dataset = get_mnist() + dataset = datasets.MNIST(".", download=True) test_plugin = plugin(n_iter=10, n_units_latent=13, n_iter_print=2) X = ImageDataLoader(dataset).sample(100) diff --git a/tests/plugins/privacy/test_adsgan.py b/tests/plugins/privacy/test_adsgan.py index 38f40c42..67493827 100644 --- a/tests/plugins/privacy/test_adsgan.py +++ b/tests/plugins/privacy/test_adsgan.py @@ -49,7 +49,6 @@ def test_plugin_fit() -> None: test_plugin.fit(X) -@pytest.mark.skipif(sys.platform != "linux", reason="Only test on linux for speed") def test_plugin_generate() -> None: test_plugin = plugin( n_iter=100, generator_n_layers_hidden=1, generator_n_units_hidden=10 diff --git a/tests/plugins/privacy/test_aim.py b/tests/plugins/privacy/test_aim.py index 08ab1b13..c132bfcb 100644 --- a/tests/plugins/privacy/test_aim.py +++ b/tests/plugins/privacy/test_aim.py @@ -1,6 +1,5 @@ # stdlib import random -import sys from datetime import datetime, timedelta # third party @@ -162,7 +161,6 @@ def gen_datetime(min_year: int = 2000, max_year: int = datetime.now().year) -> d return start + (end - start) * random.random() -@pytest.mark.skipif(sys.platform == "darwin", reason="Linux only for faster results") def test_plugin_encoding() -> None: assert plugin is not None data = [[gen_datetime(), i % 2 == 0, i] for i in range(10)]