From 43ce3fc672c32264d7a8f2c44c11adef181629f1 Mon Sep 17 00:00:00 2001 From: skshetry <18718008+skshetry@users.noreply.github.com> Date: Wed, 21 Aug 2024 21:42:44 +0545 Subject: [PATCH] run benchmarks from individual repositories (#536) --- .github/workflows/build.yml | 223 +++++++++++++++------------------ .pre-commit-config.yaml | 54 ++++---- README.md | 12 +- action.yml | 55 -------- data/mnist/dataset.dvc | 1 - plots.py | 6 +- pyproject.toml | 46 ++++--- requirements.txt | 4 - scripts/ci/gen_html.sh | 2 +- scripts/ci/list_tests.sh | 2 +- scripts/ci/list_tests_cloud.sh | 6 - setup.cfg | 17 --- tests/__init__.py | 0 tests/conftest.py | 160 ----------------------- tests/utils/__init__.py | 56 --------- tests/utils/asserts.py | 18 --- tests/utils/httpd.py | 124 ------------------ 17 files changed, 158 insertions(+), 628 deletions(-) delete mode 100644 action.yml delete mode 100755 scripts/ci/list_tests_cloud.sh delete mode 100644 setup.cfg delete mode 100644 tests/__init__.py delete mode 100644 tests/conftest.py delete mode 100644 tests/utils/__init__.py delete mode 100644 tests/utils/asserts.py delete mode 100644 tests/utils/httpd.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f63eabb3..3eedeab8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -68,31 +68,66 @@ jobs: cache-dependency-path: requirements.txt - name: install requirements run: uv pip install -r requirements.txt + - uses: pre-commit/action@v3.0.1 gen: runs-on: ubuntu-latest outputs: tests: ${{ steps.tests.outputs.tests }} + azure-tests: ${{ steps.azure-tests.outputs.azure-tests }} + gs-tests: ${{ steps.gs-tests.outputs.gs-tests }} + s3-tests: ${{ steps.s3-tests.outputs.s3-tests }} steps: - - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.12" + - uses: actions/checkout@v4 - uses: actions/checkout@v4 with: repository: ${{ env.DVC_REPOSITORY }} ref: ${{ env.DVC_REF }} path: dvc fetch-depth: 0 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_AZURE_REPOSITORY }} + ref: ${{ env.DVC_AZURE_REF }} + path: dvc-azure + fetch-depth: 0 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_GS_REPOSITORY }} + ref: ${{ env.DVC_GS_REF }} + path: dvc-gs + fetch-depth: 0 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_S3_REPOSITORY }} + ref: ${{ env.DVC_S3_REF }} + path: dvc-s3 + fetch-depth: 0 - uses: hynek/setup-cached-uv@v2 with: cache-dependency-path: | - requirements.txt dvc/pyproject.toml + dvc-azure/pyproject.toml + dvc-gs/pyproject.toml + dvc-s3/pyproject.toml - name: install reqs - run: uv pip install -r requirements.txt "./dvc[testing]" + run: uv pip install "./dvc[tests]" "./dvc-azure[tests]" "./dvc-gs[tests]" "./dvc-s3[tests]" - id: tests - run: echo "tests=$(./scripts/ci/list_tests.sh)" >> $GITHUB_OUTPUT + working-directory: dvc/ + run: echo "tests=$(../scripts/ci/list_tests.sh dvc/testing/benchmarks)" >> $GITHUB_OUTPUT + - id: azure-tests + working-directory: dvc-azure/ + run: echo "azure-tests=$(../scripts/ci/list_tests.sh dvc_azure/tests/benchmarks.py)" >> $GITHUB_OUTPUT + - id: gs-tests + working-directory: dvc-gs/ + run: echo "gs-tests=$(../scripts/ci/list_tests.sh dvc_gs/tests/benchmarks.py)" >> $GITHUB_OUTPUT + - id: s3-tests + working-directory: dvc-s3/ + run: echo "s3-tests=$(../scripts/ci/list_tests.sh dvc_s3/tests/benchmarks.py)" >> $GITHUB_OUTPUT + build: needs: [gen] timeout-minutes: 180 @@ -108,6 +143,8 @@ jobs: with: python-version: "3.12" - uses: actions/checkout@v4 + with: + path: dvc-bench - uses: actions/checkout@v4 with: repository: ${{ env.DVC_REPOSITORY }} @@ -116,11 +153,9 @@ jobs: fetch-depth: 0 - uses: hynek/setup-cached-uv@v2 with: - cache-dependency-path: | - requirements.txt - dvc/pyproject.toml + cache-dependency-path: dvc/pyproject.toml - name: install requirements - run: uv pip install -r requirements.txt "./dvc[testing]" + run: uv pip install "./dvc[tests]" - uses: actions/cache/restore@v4 id: restore-cache with: @@ -128,7 +163,14 @@ jobs: key: ${{ env.DATASET }} - name: run benchmarks shell: bash - run: pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func --dvc-revs ${REVS} --pyargs ${{ matrix.test.path }} --dataset ${DATASET} --dvc-bench-git-repo $(pwd) + working-directory: dvc/ + run: > + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} + --dvc-revs ${REVS} + --dataset ${DATASET} + --dvc-bench-repo ../dvc-bench --dvc-repo $(pwd) + -W ignore - if: ${{ steps.restore-cache.outputs.cache-hit != 'true' && matrix.test.name == 'test_add_copy' }} uses: actions/cache/save@v4 with: @@ -138,47 +180,15 @@ jobs: uses: actions/upload-artifact@v4 with: name: .benchmarks-${{ matrix.test.name }} - path: .benchmarks - - gen_s3: - runs-on: ubuntu-latest - outputs: - tests: ${{ steps.tests.outputs.tests }} - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - uses: actions/checkout@v4 - with: - repository: ${{ env.DVC_REPOSITORY }} - ref: ${{ env.DVC_REF }} - path: dvc - fetch-depth: 0 - - uses: actions/checkout@v4 - with: - repository: ${{ env.DVC_S3_REPOSITORY }} - ref: ${{ env.DVC_S3_REF }} - path: dvc-s3 - fetch-depth: 0 - - uses: hynek/setup-cached-uv@v2 - with: - cache-dependency-path: | - requirements.txt - dvc/pyproject.toml - dvc-s3/pyproject.toml - - name: install reqs - run: uv pip install -r requirements.txt "./dvc[testing]" "./dvc-s3[tests]" - - id: tests - run: echo "tests=$(./scripts/ci/list_tests_cloud.sh s3)" >> $GITHUB_OUTPUT + path: dvc/.benchmarks build_s3: if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} - needs: [gen_s3] + needs: [gen] strategy: fail-fast: false matrix: - test: ${{fromJson(needs.gen_s3.outputs.tests)}} + test: ${{fromJson(needs.gen.outputs.s3-tests)}} runs-on: ubuntu-latest name: run ${{ matrix.test.name }} timeout-minutes: 480 @@ -197,6 +207,8 @@ jobs: with: python-version: "3.12" - uses: actions/checkout@v4 + with: + path: dvc-bench - uses: actions/checkout@v4 with: repository: ${{ env.DVC_REPOSITORY }} @@ -212,11 +224,10 @@ jobs: - uses: hynek/setup-cached-uv@v2 with: cache-dependency-path: | - requirements.txt dvc/pyproject.toml dvc-s3/pyproject.toml - name: install requirements - run: uv pip install -r requirements.txt "./dvc[testing]" "./dvc-s3[tests]" + run: uv pip install "./dvc[tests]" "./dvc-s3[tests]" - uses: actions/cache/restore@v4 with: path: .dvc/cache @@ -227,52 +238,28 @@ jobs: echo "DVC_TEST_AWS_REPO_BUCKET=dvc-bench-ci" >> "$GITHUB_ENV" - name: run benchmarks shell: bash - run: pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${CLOUD_REVS} --dvc-install-deps s3 --pyargs ${{ matrix.test.path }} --dataset ${DATASET} --dvc-bench-git-repo $(pwd) + working-directory: dvc-s3/ + run: > + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} + --dvc-revs ${CLOUD_REVS} + --dataset ${DATASET} + --dvc-install-deps s3 + --dvc-bench-repo ../dvc-bench --dvc-repo ../dvc + -W ignore - name: upload raw results uses: actions/upload-artifact@v4 with: name: .benchmarks-${{ matrix.test.name }} - path: .benchmarks - - gen_azure: - runs-on: ubuntu-latest - outputs: - tests: ${{ steps.tests.outputs.tests }} - steps: - - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - uses: actions/checkout@v4 - - uses: actions/checkout@v4 - with: - repository: ${{ env.DVC_REPOSITORY }} - ref: ${{ env.DVC_REF }} - path: dvc - fetch-depth: 0 - - uses: actions/checkout@v4 - with: - repository: ${{ env.DVC_AZURE_REPOSITORY }} - ref: ${{ env.DVC_AZURE_REF }} - path: dvc-azure - fetch-depth: 0 - - uses: hynek/setup-cached-uv@v2 - with: - cache-dependency-path: | - requirements.txt - dvc/pyproject.toml - dvc-azure/pyproject.toml - - name: install requirements - run: uv pip install -r requirements.txt "./dvc[testing]" "./dvc-azure[tests]" - - id: tests - run: echo "tests=$(./scripts/ci/list_tests_cloud.sh azure)" >> $GITHUB_OUTPUT + path: dvc-s3/.benchmarks build_azure: if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} - needs: [gen_azure] + needs: [gen] strategy: fail-fast: false matrix: - test: ${{fromJson(needs.gen_azure.outputs.tests)}} + test: ${{fromJson(needs.gen.outputs.azure-tests)}} runs-on: ubuntu-latest name: run ${{ matrix.test.name }} timeout-minutes: 480 @@ -285,6 +272,8 @@ jobs: with: python-version: "3.12" - uses: actions/checkout@v4 + with: + path: dvc-bench - uses: actions/checkout@v4 with: repository: ${{ env.DVC_REPOSITORY }} @@ -300,7 +289,6 @@ jobs: - uses: hynek/setup-cached-uv@v2 with: cache-dependency-path: | - requirements.txt dvc/pyproject.toml dvc-azure/pyproject.toml - name: 'Az CLI login' @@ -318,7 +306,7 @@ jobs: echo "::add-mask::$connection_string" echo "DVC_TEST_AZURE_CONNECTION_STRING=$connection_string" >> $GITHUB_ENV - name: install requirements - run: uv pip install -r requirements.txt "./dvc[testing]" "./dvc-azure[tests]" + run: uv pip install "./dvc[tests]" "./dvc-azure[tests]" - uses: actions/cache/restore@v4 with: path: .dvc/cache @@ -329,53 +317,28 @@ jobs: echo "DVC_TEST_AZURE_PATH=az://dvc-bench-ci" >> $GITHUB_ENV - name: run benchmarks shell: bash - run: | - pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${CLOUD_REVS} --dvc-install-deps azure --pyargs ${{ matrix.test.path }} --dataset ${DATASET} --dvc-bench-git-repo $(pwd) + working-directory: dvc-azure/ + run: > + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} + --dvc-revs ${CLOUD_REVS} + --dataset ${DATASET} + --dvc-install-deps azure + --dvc-bench-repo ../dvc-bench --dvc-repo ../dvc + -W ignore - name: upload raw results uses: actions/upload-artifact@v4 with: name: .benchmarks-${{ matrix.test.name }} - path: .benchmarks - - gen_gs: - runs-on: ubuntu-latest - outputs: - tests: ${{ steps.tests.outputs.tests }} - steps: - - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - uses: actions/checkout@v4 - - uses: actions/checkout@v4 - with: - repository: ${{ env.DVC_REPOSITORY }} - ref: ${{ env.DVC_REF }} - path: dvc - fetch-depth: 0 - - uses: actions/checkout@v4 - with: - repository: ${{ env.DVC_GS_REPOSITORY }} - ref: ${{ env.DVC_GS_REF }} - path: dvc-gs - fetch-depth: 0 - - uses: hynek/setup-cached-uv@v2 - with: - cache-dependency-path: | - requirements.txt - dvc/pyproject.toml - dvc-gs/pyproject.toml - - name: install reqs - run: uv pip install -r requirements.txt "./dvc[testing]" "./dvc-gs[tests]" - - id: tests - run: echo "tests=$(./scripts/ci/list_tests_cloud.sh gs)" >> $GITHUB_OUTPUT + path: dvc-azure/.benchmarks build_gs: if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} - needs: [gen_gs] + needs: [gen] strategy: fail-fast: false matrix: - test: ${{fromJson(needs.gen_gs.outputs.tests)}} + test: ${{fromJson(needs.gen.outputs.gs-tests)}} runs-on: ubuntu-latest name: run ${{ matrix.test.name }} timeout-minutes: 480 @@ -387,6 +350,8 @@ jobs: with: python-version: "3.12" - uses: actions/checkout@v4 + with: + path: dvc-bench - uses: actions/checkout@v4 with: repository: ${{ env.DVC_REPOSITORY }} @@ -402,11 +367,10 @@ jobs: - uses: hynek/setup-cached-uv@v2 with: cache-dependency-path: | - requirements.txt dvc/pyproject.toml dvc-gs/pyproject.toml - name: install reqs - run: uv pip install -r requirements.txt "./dvc[testing]" "./dvc-gs[tests]" + run: uv pip install "./dvc[tests]" "./dvc-gs[tests]" - uses: actions/cache/restore@v4 with: path: .dvc/cache @@ -425,12 +389,21 @@ jobs: echo "DVC_TEST_GS_BUCKET=dvc-bench" >> "$GITHUB_ENV" - name: run benchmarks shell: bash - run: pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${CLOUD_REVS} --dvc-install-deps gs --pyargs ${{ matrix.test.path }} --dataset ${DATASET} --dvc-bench-git-repo $(pwd) + working-directory: dvc-gs/ + run: > + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} + --dvc-revs ${CLOUD_REVS} + --dataset ${DATASET} + --dvc-install-deps gs + --dvc-bench-repo ../dvc-bench --dvc-repo ../dvc + -W ignore + ${{ matrix.test.path }} - name: upload raw results uses: actions/upload-artifact@v4 with: name: .benchmarks-${{ matrix.test.name }} - path: .benchmarks + path: dvc-gs/.benchmarks notify: if: github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main' && failure() needs: [build, build_s3, build_azure, build_gs] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d18356fd..5210b2dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,35 +1,37 @@ repos: -- hooks: - - id: black - exclude: '^env/' - exclude: '^dvc/' - files: '.*\.py' - language_version: python3 - repo: https://github.com/psf/black - rev: 24.8.0 -- repo: https://github.com/asottile/seed-isort-config - rev: v2.2.0 - hooks: - - id: seed-isort-config -- hooks: - - id: isort - language_version: python3 - repo: https://github.com/timothycrosley/isort - rev: 5.13.2 -- hooks: - - id: flake8 - exclude: '^env/' - exclude: '^dvc/' - files: '.*\.py' - language_version: python3 - repo: https://github.com/pycqa/flake8 - rev: 7.1.1 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-json + - id: check-merge-conflict + exclude: "tests/func/utils/test_strict_yaml.py" + args: ['--assume-in-merge'] + - id: check-toml + - id: check-yaml + - id: end-of-file-fixer + - id: mixed-line-ending + - id: sort-simple-yaml + - id: trailing-whitespace +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: 'v0.6.1' + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: ["tomli"] - hooks: - args: - -i - "2" id: beautysh + additional_dependencies: ["setuptools"] language_version: python3 repo: https://github.com/lovesegfault/beautysh rev: v6.2.1 - diff --git a/README.md b/README.md index dc40358f..85079ac5 100644 --- a/README.md +++ b/README.md @@ -31,24 +31,24 @@ $ pytest -h ... --dataset=DATASET Dataset name to use in tests (e.g. tiny/small/large/mnist/etc) - --remote={azure,gdrive,gs,hdfs,http,oss,s3,ssh,webdav} - Remote type to use in tests --dvc-bin=DVC_BIN Path to dvc binary --dvc-revs=DVC_REVS Comma-separated list of DVC revisions to test (overrides `--dvc-bin`) - --dvc-git-repo=DVC_GIT_REPO + --dvc-repo=DVC_GIT_REPO Path or url to dvc git repo - --dvc-bench-git-repo=DVC_BENCH_GIT_REPO + --dvc-bench-repo=DVC_BENCH_GIT_REPO Path or url to dvc-bench git repo (for loading benchmark dataset) + --dvc-install-deps=DVC_INSTALL_DEPS + Comma-separated list of DVC installation packages --project-rev=PROJECT_REV Project revision to test - --project-git-repo=PROJECT_GIT_REPO + --project-repo=PROJECT_GIT_REPO Path or url to dvc project ... ``` ### Comparing results ``` -$ py.test-benchmark compare --histogram histograms/ --group-by name --sort name --csv results.csv +$ pytest-benchmark compare --histogram histograms/ --group-by name --sort name --csv results.csv ``` and if you want beautiful plots: diff --git a/action.yml b/action.yml deleted file mode 100644 index 9c52571d..00000000 --- a/action.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: 'DVC Benchmarks' -description: 'Run dvc benchmarks' -inputs: - pytest_options: - description: 'pytest options' - default: "" - required: false -runs: - using: "composite" - steps: - - name: clone dvc-bench - uses: actions/checkout@v4 - with: - path: dvc-bench - repository: iterative/dvc-bench - - uses: hynek/setup-cached-uv@v2 - - name: install dvc-bench requirements - working-directory: dvc-bench - shell: bash - run: uv pip install -r requirements.txt --system - - name: checkout base dvc version - uses: actions/checkout@v4 - with: - path: dvc-bench/dvc - ref: ${{ github.event.pull_request.base.sha }} - fetch-depth: 0 - - name: install base dvc version - shell: bash - working-directory: dvc-bench/dvc - run: uv pip install '.[all,tests]' --system - - name: run benchmarks for base version - shell: bash - working-directory: dvc-bench - env: - DVC_TEST: "true" - run: | - dvc --version - pytest --pyargs dvc.testing.benchmarks --benchmark-autosave ${{ inputs.pytest_options }} - - name: checkout PR dvc version - uses: actions/checkout@v4 - with: - path: dvc-bench/dvc - fetch-depth: 0 - - name: install PR dvc version - shell: bash - working-directory: dvc-bench/dvc - run: uv pip install '.[all,tests]' --system - - name: run benchmarks for PR - shell: bash - working-directory: dvc-bench - env: - DVC_TEST: "true" - run: | - dvc --version - PY_COLORS=1 pytest --pyargs dvc.testing.benchmarks --benchmark-compare --benchmark-compare-fail=median:5% --benchmark-group-by name ${{ inputs.pytest_options}} diff --git a/data/mnist/dataset.dvc b/data/mnist/dataset.dvc index ac31d917..e4a8bd55 100644 --- a/data/mnist/dataset.dvc +++ b/data/mnist/dataset.dvc @@ -3,4 +3,3 @@ outs: size: 19258482 nfiles: 70000 path: dataset - diff --git a/plots.py b/plots.py index e09cc6a2..62a57d2d 100644 --- a/plots.py +++ b/plots.py @@ -1,7 +1,7 @@ import os import pandas as pd -from distutils.version import StrictVersion +from packaging.version import Version df = pd.read_csv("results.csv") df["test"] = df["name"].str.extract(r"::(.*)\[") @@ -9,9 +9,9 @@ def version(x): try: - return StrictVersion(x) + return Version(x) except ValueError: - return StrictVersion("99.99.99") + return Version("99.99.99") os.makedirs("plots", exist_ok=True) diff --git a/pyproject.toml b/pyproject.toml index cfc88dba..e20d0ece 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,27 +1,23 @@ -[tool.black] -line-length = 79 -include = '\.pyi?$' -exclude = ''' -/( - \.eggs - | \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | html - | dvc - | results - | envs - | _build - | buck-out - | build - | dist -)/ -''' +[tool.ruff] +output-format = "full" +show-fixes = true -[tool.pytest.ini_options] -addopts = "-ra" -testpaths = [ - "tests", +[tool.ruff.lint] +ignore = [ + "N818", "S101", "ISC001", "PT007", "RET502", "RET503", "SIM105", "SIM108", "SIM117", + "TRY003", "TRY300", "PERF203", "PLR2004", "PLW2901", "LOG007", ] +select = [ + "F", "E", "W", "C90", "I", "N", "UP", "YTT", "ASYNC", "S", "BLE", "B", "A", "C4", "DTZ", "T10", + "EXE", "ISC", "ICN", "LOG", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", + "SLOT", "SIM", "TID", "TCH", "ARG", "PGH", "PLC", "PLE", "PLR", "PLW", "TRY", + "FLY", "PERF", "FURB", "RUF", "RUF022", "RUF023", "RUF024", "RUF026", "RUF027", "RUF029", "RUF030", +] +preview = true +explicit-preview-rules = true + +[tool.ruff.lint.flake8-type-checking] +strict = true + +[tool.ruff.lint.flake8-unused-arguments] +ignore-variadic-names = true diff --git a/requirements.txt b/requirements.txt index fe134169..27539553 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,5 @@ -virtualenv -dvc[s3,tests]>=3.48.2 pytest-benchmark[histogram] pygal>=3.0.3 -importlib_metadata ansi2html csv2md -funcy pandas diff --git a/scripts/ci/gen_html.sh b/scripts/ci/gen_html.sh index b4fd5269..8d9e0306 100755 --- a/scripts/ci/gen_html.sh +++ b/scripts/ci/gen_html.sh @@ -19,7 +19,7 @@ cat raw >> report.md for file in $(find .benchmarks -type f | sort); do rm -rf tmp_html results.csv raw - PY_COLORS=1 py.test-benchmark compare $file --histogram histograms/ --group-by func --csv results.csv --sort name >> raw + PY_COLORS=1 pytest-benchmark compare $file --histogram histograms/ --group-by func --csv results.csv --sort name >> raw dvc repro --no-run-cache dvc plots show -o tmp_html cat tmp_html/index.html >> html/index.html diff --git a/scripts/ci/list_tests.sh b/scripts/ci/list_tests.sh index 8c45fbd3..e20f564d 100755 --- a/scripts/ci/list_tests.sh +++ b/scripts/ci/list_tests.sh @@ -2,4 +2,4 @@ set -e -pytest --collect-only -q --pyargs dvc.testing.benchmarks | grep test_ | sed -E 's/([A-Za-z\/_]+)(\.py)(::([A-Za-z0-9_]+))?.*/dvc.testing.benchmarks.\1\3 \4/' | sed 's/\//./g' | jq -R -s -c 'split("\n")[:-1] | map({path: . | split(" ")[0], name: . | split(" ")[-1]})' +pytest --collect-only -q $1 | head -n -2 | sed 's/\[[^]]*\]//g' | jq -Rcs 'split("\n")[:-1] | map(. as $p | split("::") | {path: $p, name: .[1]})' diff --git a/scripts/ci/list_tests_cloud.sh b/scripts/ci/list_tests_cloud.sh deleted file mode 100755 index 8b4ca90d..00000000 --- a/scripts/ci/list_tests_cloud.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -set -e - -PLUGIN="dvc_$1" -pytest --collect-only -q --pyargs "$PLUGIN.tests.benchmarks" | grep test_ | sed -E "s/::([A-Za-z0-9_]+).*/$PLUGIN.tests.benchmarks::\\1 \\1/" | sed 's/\//./g' | jq -R -s -c 'split("\n")[:-1] | map({path: . | split(" ")[0], name: . | split(" ")[-1]})' diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index bb991f0b..00000000 --- a/setup.cfg +++ /dev/null @@ -1,17 +0,0 @@ -[flake8] -ignore= - # Whitespace before ':' - E203 - # Too many leading '#' for block comment - E266 - # Line break occurred before a binary operator - W503 -max_line_length=79 -select=B,C,E,F,W,T4,B9 - -[isort] -include_trailing_comma=true -known_first_party=benchmarks,dvc -known_third_party=RangeHTTPServer,distutils,funcy,pandas,pytest -line_length=79 -multi_line_output=3 diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 90ee2fa3..00000000 --- a/tests/conftest.py +++ /dev/null @@ -1,160 +0,0 @@ -import os -from contextlib import suppress - -import pytest - -from dvc.testing.fixtures import * # noqa, pylint: disable=wildcard-import - -# Prevent updater and analytics from running their processes -os.environ["DVC_TEST"] = "true" -# Ensure progress output even when not outputting to raw sys.stderr console -os.environ["DVC_IGNORE_ISATTY"] = "true" -# Disable system git config -os.environ["GIT_CONFIG_NOSYSTEM"] = "1" - -pytest_plugins = ["dvc.testing.plugin"] - -REMOTES = { - # remote: enabled_by_default? - "azure": False, - "gdrive": False, - "gs": False, - "hdfs": False, - "http": True, - "oss": False, - "s3": False, - "ssh": True, - "webdav": True, -} - - -@pytest.fixture(autouse=True) -def reset_loglevel(request, caplog): - """ - Use it to ensure log level at the start of each test - regardless of dvc.logger.setup(), Repo configs or whatever. - """ - ini_opt = None - with suppress(ValueError): - ini_opt = request.config.getini("log_level") - - level = request.config.getoption("--log-level") or ini_opt - if level: - with caplog.at_level(level.upper(), logger="dvc"): - yield - else: - yield - - -@pytest.fixture(autouse=True) -def enable_ui(): - from dvc.ui import ui - - ui.enable() - - -def _get_opt(remote_name, action): - return f"--{action}-{remote_name}" - - -def pytest_addoption(parser): - """Adds remote-related flags to selectively disable/enable for tests - Eg: If some remotes, eg: ssh is enabled to be tested for by default - (see above `REMOTES`), then, `--disable-ssh` flag is added. If remotes - like `hdfs` are disabled by default, `--enable-hdfs` is added to make them - run. - - You can also make everything run-by-default with `--all` flag, which takes - precedence on all previous `--enable-*`/`--disable-*` flags. - """ - parser.addoption( - "--all", - action="store_true", - default=False, - help="Test all of the remotes, unless other flags also supplied", - ) - for remote_name in REMOTES: - for action in ("enable", "disable"): - opt = _get_opt(remote_name, action) - parser.addoption( - opt, - action="store_true", - default=None, - help=f"{action} tests for {remote_name}", - ) - - parser.addoption( - "--remote", - choices=list(REMOTES.keys()), - default="local", - help="Remote type to use in tests", - ) - - -class DVCTestConfig: - def __init__(self): - self.enabled_remotes = set() - self.remote = "local" - - def requires(self, remote_name): - if remote_name not in REMOTES or remote_name in self.enabled_remotes: - return - - pytest.skip(f"{remote_name} tests not enabled through CLI") - - def apply_marker(self, marker): - self.requires(marker.name) - - -def pytest_runtest_setup(item): - # Apply test markers to skip tests selectively - # NOTE: this only works on individual tests, - # for fixture, use `test_config` fixture and - # run `test_config.requires(remote_name)`. - for marker in item.iter_markers(): - item.config.dvc_config.apply_marker(marker) - - if ( - "CI" in os.environ - and item.get_closest_marker("needs_internet") is not None - ): - # remotes that need internet connection might be flaky, - # so we rerun them in case it fails. - item.add_marker(pytest.mark.flaky(max_runs=5, min_passes=1)) - - -@pytest.fixture(scope="session") -def test_config(request): - return request.config.dvc_config - - -def pytest_configure(config): - config.dvc_config = DVCTestConfig() - - for remote_name in REMOTES: - config.addinivalue_line( - "markers", f"{remote_name}: mark test as requiring {remote_name}" - ) - - enabled_remotes = config.dvc_config.enabled_remotes - if config.getoption("--all"): - enabled_remotes.update(REMOTES) - else: - default_enabled = {k for k, v in REMOTES.items() if v} - enabled_remotes.update(default_enabled) - - for remote_name in REMOTES: - enabled_opt = _get_opt(remote_name, "enable") - disabled_opt = _get_opt(remote_name, "disable") - - enabled = config.getoption(enabled_opt) - disabled = config.getoption(disabled_opt) - if disabled and enabled: - continue # default behavior if both flags are supplied - - if disabled: - enabled_remotes.discard(remote_name) - if enabled: - enabled_remotes.add(remote_name) - - config.dvc_config.remote = config.getoption("--remote") diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py deleted file mode 100644 index c14cc7ae..00000000 --- a/tests/utils/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -import csv -import os -from contextlib import contextmanager - -import pytest -from funcy import first - -from dvc.scm import Git - -# rewrite assertions in assert, pytest does not rewrite for other modules -# than tests itself. -pytest.register_assert_rewrite("tests.utils.asserts") - - -def get_gitignore_content(): - with open(Git.GITIGNORE) as gitignore: - return gitignore.read().splitlines() - - -@contextmanager -def cd(newdir): - prevdir = os.getcwd() - os.chdir(os.path.expanduser(newdir)) - try: - yield - finally: - os.chdir(prevdir) - - -def to_posixpath(path): - return path.replace("\\", "/") - - -def dump_sv(stream, metrics, delimiter=",", header=True): - if header: - writer = csv.DictWriter( - stream, fieldnames=list(first(metrics).keys()), delimiter=delimiter - ) - writer.writeheader() - writer.writerows(metrics) - else: - writer = csv.writer(stream) - for d in metrics: - writer.writerow(list(d.values())) - - -def clean_staging(): - from dvc.fs.memory import MemoryFileSystem - from dvc.objects.stage import _STAGING_MEMFS_PATH - - try: - MemoryFileSystem().fs.rm( - f"memory://{_STAGING_MEMFS_PATH}", recursive=True - ) - except FileNotFoundError: - pass diff --git a/tests/utils/asserts.py b/tests/utils/asserts.py deleted file mode 100644 index 084ee850..00000000 --- a/tests/utils/asserts.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Any, Dict -from unittest.mock import ANY, Mock - - -def issubset(subset: Dict, superset: Dict) -> bool: - assert {**superset, **subset} == superset - return True - - -def called_once_with_subset(m: Mock, *args: Any, **kwargs: Any) -> bool: - m.assert_called_once() - m_args, m_kwargs = m.call_args - - expected_args = m_args + (ANY,) * (len(m_args) - len(args)) - expected_kwargs = {k: kwargs.get(k, ANY) for k in m_kwargs} - m.assert_called_with(*expected_args, **expected_kwargs) - - return True diff --git a/tests/utils/httpd.py b/tests/utils/httpd.py deleted file mode 100644 index 44c0d7bf..00000000 --- a/tests/utils/httpd.py +++ /dev/null @@ -1,124 +0,0 @@ -import hashlib -import os -import sys -import threading -from contextlib import contextmanager -from http import HTTPStatus -from http.server import HTTPServer - -from RangeHTTPServer import RangeRequestHandler - - -class TestRequestHandler(RangeRequestHandler): - def __init__(self, *args, **kwargs): - # NOTE: `directory` was introduced in 3.7 - if sys.version_info < (3, 7): - self.directory = kwargs.pop("directory", None) or os.getcwd() - super().__init__(*args, **kwargs) - - def translate_path(self, path): - import posixpath - import urllib - - # NOTE: `directory` was introduced in 3.7 - if sys.version_info >= (3, 7): - return super().translate_path(path) - - path = path.split("?", 1)[0] - path = path.split("#", 1)[0] - # Don't forget explicit trailing slash when normalizing. Issue17324 - trailing_slash = path.rstrip().endswith("/") - try: - path = urllib.parse.unquote(path, errors="surrogatepass") - except UnicodeDecodeError: - path = urllib.parse.unquote(path) - path = posixpath.normpath(path) - words = path.split("/") - words = filter(None, words) - path = self.directory - for word in words: - if os.path.dirname(word) or word in (os.curdir, os.pardir): - # Ignore components that are not a simple file/directory name - continue - path = os.path.join(path, word) - if trailing_slash: - path += "/" - return path - - def end_headers(self): - # RangeRequestHandler only sends Accept-Ranges header if Range header - # is present, see https://github.com/danvk/RangeHTTPServer/issues/23 - if not self.headers.get("Range"): - self.send_header("Accept-Ranges", "bytes") - - # Add a checksum header - file = self.translate_path(self.path) - - if not os.path.isdir(file) and os.path.exists(file): - with open(file) as fd: - encoded_text = fd.read().encode("utf8") - checksum = hashlib.md5(encoded_text).hexdigest() - self.send_header("Content-MD5", checksum) - - RangeRequestHandler.end_headers(self) - - def _chunks(self): - while True: - data = self.rfile.readline(65537) - chunk_size = int(data[:-2], 16) - if chunk_size == 0: - return - data = self.rfile.read(chunk_size) - yield data - self.rfile.read(2) - - def do_POST(self): - chunked = self.headers.get("Transfer-Encoding", "") == "chunked" - path = self.translate_path(self.path) - try: - os.makedirs(os.path.dirname(path), exist_ok=True) - with open(path, "wb") as fd: - if chunked: - for chunk in self._chunks(): - fd.write(chunk) - else: - size = int(self.headers.get("Content-Length", 0)) - fd.write(self.rfile.read(size)) - except OSError as e: - self.send_error(HTTPStatus.INTERNAL_SERVER_ERROR, str(e)) - self.send_response(HTTPStatus.OK) - self.end_headers() - - -@contextmanager -def run_server_on_thread(server): - thread = threading.Thread(target=server.serve_forever) - thread.daemon = True - thread.start() - - yield server - - server.socket.close() - server.shutdown() - server.server_close() - - -class StaticFileServer: - _lock = threading.Lock() - - def __init__(self, directory): - from functools import partial - - addr = ("localhost", 0) - req = partial(TestRequestHandler, directory=directory) - server = HTTPServer(addr, req) - self.runner = run_server_on_thread(server) - - # pylint: disable=no-member - def __enter__(self): - self._lock.acquire() - return self.runner.__enter__() - - def __exit__(self, *args): - self.runner.__exit__(*args) - self._lock.release()