build #2328
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: build | |
on: | |
pull_request: {} | |
push: | |
branches: | |
- main | |
schedule: | |
- cron: '0 0 * * *' | |
workflow_dispatch: | |
inputs: | |
dataset: | |
description: "Dataset Size" | |
required: false | |
default: "small" | |
type: choice | |
options: | |
- tiny | |
- small | |
- large | |
- mnist | |
- openorca | |
- used-cars | |
- youtube-trending | |
- meta-kaggle | |
- chest-xray-pneumonia | |
revs: | |
description: "Comma-separated list of DVC revisions" | |
required: false | |
default: "" | |
type: string | |
clouds: | |
description: "Run s3/gs/azure benchmarks" | |
required: false | |
default: false | |
type: boolean | |
host_dvc_rev: | |
description: "DVC revision to use for running the tests" | |
required: false | |
default: "main" | |
type: string | |
tests_to_run: | |
description: "Comma-separated list of names of tests to run. If left empty (default), it will run all the tests" | |
required: false | |
default: "" | |
type: string | |
cprofile: | |
description: "Enable cprofile" | |
required: false | |
default: false | |
type: boolean | |
env: | |
DVC_TEST: "true" | |
FORCE_COLOR: "1" | |
DATASET: ${{ (github.event_name == 'schedule' && 'mnist') || github.event.inputs.dataset || 'small' }} | |
REVS: ${{ github.event.inputs.revs || 'main,3.53.2,3.10.0,2.58.2' }} | |
# run on small set of revisions for clouds | |
CLOUD_REVS: ${{ github.event.inputs.revs || 'main,3.53.2' }} | |
DVC_REPOSITORY: iterative/dvc | |
DVC_REF: ${{ github.event.inputs.host_dvc_rev || 'main' }} | |
DVC_AZURE_REPOSITORY: iterative/dvc-azure | |
DVC_AZURE_REF: main | |
DVC_GS_REPOSITORY: iterative/dvc-gs | |
DVC_GS_REF: main | |
DVC_S3_REPOSITORY: iterative/dvc-s3 | |
DVC_S3_REF: main | |
UV_SYSTEM_PYTHON: true | |
TESTS_TO_RUN: ${{ github.event.inputs.tests_to_run || '' }} | |
CPROFILE_ARGS: ${{ github.event.inputs.cprofile == 'true' && '--dvc-benchmark-cprofile-dump' || '' }} | |
permissions: | |
contents: read | |
jobs: | |
lint: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
- uses: astral-sh/setup-uv@v4 | |
with: | |
enable-cache: true | |
cache-dependency-glob: requirements.txt | |
- name: install requirements | |
run: uv pip install -r requirements.txt | |
- uses: pre-commit/[email protected] | |
gen: | |
runs-on: ubuntu-latest | |
outputs: | |
tests: ${{ steps.tests.outputs.tests }} | |
azure-tests: ${{ steps.azure-tests.outputs.azure-tests }} | |
gs-tests: ${{ steps.gs-tests.outputs.gs-tests }} | |
s3-tests: ${{ steps.s3-tests.outputs.s3-tests }} | |
steps: | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
- uses: actions/checkout@v4 | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_REPOSITORY }} | |
ref: ${{ env.DVC_REF }} | |
path: dvc | |
fetch-depth: 0 | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_AZURE_REPOSITORY }} | |
ref: ${{ env.DVC_AZURE_REF }} | |
path: dvc-azure | |
fetch-depth: 0 | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_GS_REPOSITORY }} | |
ref: ${{ env.DVC_GS_REF }} | |
path: dvc-gs | |
fetch-depth: 0 | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_S3_REPOSITORY }} | |
ref: ${{ env.DVC_S3_REF }} | |
path: dvc-s3 | |
fetch-depth: 0 | |
- uses: astral-sh/setup-uv@v4 | |
with: | |
enable-cache: true | |
cache-dependency-glob: | | |
dvc/pyproject.toml | |
dvc-azure/pyproject.toml | |
dvc-gs/pyproject.toml | |
dvc-s3/pyproject.toml | |
- name: install reqs | |
run: uv pip install "./dvc[tests]" "./dvc-azure[tests]" "./dvc-gs[tests]" "./dvc-s3[tests]" | |
- id: tests | |
working-directory: dvc/ | |
run: echo "tests=$(../scripts/ci/list_tests.sh dvc/testing/benchmarks '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT | |
- id: azure-tests | |
working-directory: dvc-azure/ | |
run: echo "azure-tests=$(../scripts/ci/list_tests.sh dvc_azure/tests/benchmarks.py '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT | |
- id: gs-tests | |
working-directory: dvc-gs/ | |
run: echo "gs-tests=$(../scripts/ci/list_tests.sh dvc_gs/tests/benchmarks.py '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT | |
- id: s3-tests | |
working-directory: dvc-s3/ | |
run: echo "s3-tests=$(../scripts/ci/list_tests.sh dvc_s3/tests/benchmarks.py '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT | |
build: | |
needs: [gen] | |
timeout-minutes: 180 | |
name: run ${{ matrix.test.name }} | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ubuntu-latest] | |
test: ${{fromJson(needs.gen.outputs.tests)}} | |
steps: | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
- uses: actions/checkout@v4 | |
with: | |
path: dvc-bench | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_REPOSITORY }} | |
ref: ${{ env.DVC_REF }} | |
path: dvc | |
fetch-depth: 0 | |
- run: ../dvc-bench/scripts/ci/fetch-all-revs.sh "${REVS}" | |
working-directory: dvc | |
- uses: astral-sh/setup-uv@v4 | |
with: | |
enable-cache: true | |
cache-dependency-glob: dvc/pyproject.toml | |
- name: install requirements | |
run: uv pip install "./dvc[tests]" | |
- uses: actions/cache/restore@v4 | |
id: restore-cache | |
with: | |
path: dvc-bench/.dvc/cache | |
key: ${{ env.DATASET }} | |
- name: run benchmarks | |
shell: bash | |
working-directory: dvc/ | |
run: > | |
pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func | |
${{ matrix.test.path }} | |
--dvc-revs ${REVS} | |
--dataset ${DATASET} | |
--dvc-bench-repo ../dvc-bench --dvc-repo $(pwd) | |
-W ignore | |
${{ env.CPROFILE_ARGS }} | |
- if: ${{ steps.restore-cache.outputs.cache-hit != 'true' && matrix.test.name == 'test_add_copy' }} | |
uses: actions/cache/save@v4 | |
with: | |
path: dvc-bench/.dvc/cache | |
key: ${{ steps.restore-cache.outputs.cache-primary-key }} | |
- name: upload raw results | |
uses: actions/upload-artifact@v4 | |
with: | |
include-hidden-files: true | |
name: .benchmarks-${{ matrix.test.name }} | |
path: dvc/.benchmarks | |
- if: ${{ env.CPROFILE_ARGS }} | |
name: upload profiling results | |
uses: actions/upload-artifact@v4 | |
with: | |
name: prof-${{ matrix.test.name }} | |
path: dvc/prof | |
build_s3: | |
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} | |
needs: [gen] | |
strategy: | |
fail-fast: false | |
matrix: | |
test: ${{fromJson(needs.gen.outputs.s3-tests)}} | |
runs-on: ubuntu-latest | |
name: run ${{ matrix.test.name }} | |
timeout-minutes: 480 | |
continue-on-error: true | |
permissions: | |
id-token: write | |
steps: | |
- name: configure AWS credentials | |
if: ${{ github.event_name == 'schedule' }} | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::342840881361:role/dvc-bench-gha | |
role-duration-seconds: 28800 | |
aws-region: us-east-1 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
- uses: actions/checkout@v4 | |
with: | |
path: dvc-bench | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_REPOSITORY }} | |
ref: ${{ env.DVC_REF }} | |
path: dvc | |
fetch-depth: 0 | |
- run: ../dvc-bench/scripts/ci/fetch-all-revs.sh "${REVS}" | |
working-directory: dvc | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_S3_REPOSITORY }} | |
ref: ${{ env.DVC_S3_REF }} | |
path: dvc-s3 | |
fetch-depth: 0 | |
- uses: astral-sh/setup-uv@v4 | |
with: | |
enable-cache: true | |
cache-dependency-glob: | | |
dvc/pyproject.toml | |
dvc-s3/pyproject.toml | |
- name: install requirements | |
run: uv pip install "./dvc[tests]" "./dvc-s3[tests]" | |
- uses: actions/cache/restore@v4 | |
with: | |
path: dvc-bench/.dvc/cache | |
key: ${{ env.DATASET }} | |
- name: configure real S3 DVC env | |
if: ${{ github.event_name == 'schedule' }} | |
run: | | |
echo "DVC_TEST_AWS_REPO_BUCKET=dvc-bench-ci" >> "$GITHUB_ENV" | |
- name: run benchmarks | |
shell: bash | |
working-directory: dvc-s3/ | |
run: > | |
pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func | |
${{ matrix.test.path }} | |
--dvc-revs ${CLOUD_REVS} | |
--dataset ${DATASET} | |
--dvc-install-deps s3 | |
--dvc-bench-repo ../dvc-bench --dvc-repo ../dvc | |
-W ignore | |
${{ env.CPROFILE_ARGS }} | |
- name: upload raw results | |
uses: actions/upload-artifact@v4 | |
with: | |
include-hidden-files: true | |
name: .benchmarks-${{ matrix.test.name }} | |
path: dvc-s3/.benchmarks | |
- if: ${{ env.CPROFILE_ARGS }} | |
name: upload profiling results | |
uses: actions/upload-artifact@v4 | |
with: | |
name: prof-${{ matrix.test.name }} | |
path: dvc-s3/prof | |
build_azure: | |
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} | |
needs: [gen] | |
strategy: | |
fail-fast: false | |
matrix: | |
test: ${{fromJson(needs.gen.outputs.azure-tests)}} | |
runs-on: ubuntu-latest | |
name: run ${{ matrix.test.name }} | |
timeout-minutes: 480 | |
continue-on-error: true | |
permissions: | |
id-token: write | |
environment: ${{ github.event_name == 'schedule' && 'github-actions' || ''}} | |
steps: | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
- uses: actions/checkout@v4 | |
with: | |
path: dvc-bench | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_REPOSITORY }} | |
ref: ${{ env.DVC_REF }} | |
path: dvc | |
fetch-depth: 0 | |
- run: ../dvc-bench/scripts/ci/fetch-all-revs.sh "${REVS}" | |
working-directory: dvc | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_AZURE_REPOSITORY }} | |
ref: ${{ env.DVC_AZURE_REF }} | |
path: dvc-azure | |
fetch-depth: 0 | |
- uses: astral-sh/setup-uv@v4 | |
with: | |
enable-cache: true | |
cache-dependency-glob: | | |
dvc/pyproject.toml | |
dvc-azure/pyproject.toml | |
- name: 'Az CLI login' | |
if: ${{ github.event_name == 'schedule' }} | |
uses: azure/login@v2 | |
with: | |
client-id: ${{ vars.AZURE_CLIENT_ID }} | |
tenant-id: ${{ vars.AZURE_TENANT_ID }} | |
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }} | |
- name: 'Set connection string' | |
if: ${{ github.event_name == 'schedule' }} | |
id: set-conn-str | |
run: | | |
connection_string=$(az storage account show-connection-string -g dvc-bench-ci -n dvcbenchci | jq -c '.connectionString') | |
echo "::add-mask::$connection_string" | |
echo "DVC_TEST_AZURE_CONNECTION_STRING=$connection_string" >> $GITHUB_ENV | |
- name: install requirements | |
run: uv pip install "./dvc[tests]" "./dvc-azure[tests]" | |
- uses: actions/cache/restore@v4 | |
with: | |
path: dvc-bench/.dvc/cache | |
key: ${{ env.DATASET }} | |
- name: setup env | |
if: ${{ github.event_name == 'schedule' }} | |
run: | | |
echo "DVC_TEST_AZURE_PATH=az://dvc-bench-ci" >> $GITHUB_ENV | |
- name: run benchmarks | |
shell: bash | |
working-directory: dvc-azure/ | |
run: > | |
pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func | |
${{ matrix.test.path }} | |
--dvc-revs ${CLOUD_REVS} | |
--dataset ${DATASET} | |
--dvc-install-deps azure | |
--dvc-bench-repo ../dvc-bench --dvc-repo ../dvc | |
-W ignore | |
${{ env.CPROFILE_ARGS }} | |
- name: upload raw results | |
uses: actions/upload-artifact@v4 | |
with: | |
include-hidden-files: true | |
name: .benchmarks-${{ matrix.test.name }} | |
path: dvc-azure/.benchmarks | |
- if: ${{ env.CPROFILE_ARGS }} | |
name: upload profiling results | |
uses: actions/upload-artifact@v4 | |
with: | |
name: prof-${{ matrix.test.name }} | |
path: dvc-azure/prof | |
build_gs: | |
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} | |
needs: [gen] | |
strategy: | |
fail-fast: false | |
matrix: | |
test: ${{fromJson(needs.gen.outputs.gs-tests)}} | |
runs-on: ubuntu-latest | |
name: run ${{ matrix.test.name }} | |
timeout-minutes: 480 | |
continue-on-error: true | |
permissions: | |
id-token: write | |
environment: google-cloud | |
steps: | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
- uses: actions/checkout@v4 | |
with: | |
path: dvc-bench | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_REPOSITORY }} | |
ref: ${{ env.DVC_REF }} | |
path: dvc | |
fetch-depth: 0 | |
- run: ../dvc-bench/scripts/ci/fetch-all-revs.sh "${REVS}" | |
working-directory: dvc | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_GS_REPOSITORY }} | |
ref: ${{ env.DVC_GS_REF }} | |
path: dvc-gs | |
fetch-depth: 0 | |
- uses: astral-sh/setup-uv@v4 | |
with: | |
enable-cache: true | |
cache-dependency-glob: | | |
dvc/pyproject.toml | |
dvc-gs/pyproject.toml | |
- name: install reqs | |
run: uv pip install "./dvc[tests]" "./dvc-gs[tests]" | |
- uses: actions/cache/restore@v4 | |
with: | |
path: dvc-bench/.dvc/cache | |
key: ${{ env.DATASET }} | |
- id: 'auth' | |
if: ${{ github.event_name == 'schedule' }} | |
name: 'Authenticate to GCP' | |
uses: 'google-github-actions/[email protected]' | |
with: | |
workload_identity_provider: 'projects/385088528371/locations/global/workloadIdentityPools/github/providers/github' | |
service_account: '[email protected]' | |
- name: configure real GS DVC env | |
if: ${{ github.event_name == 'schedule' }} | |
run: | | |
echo "DVC_TEST_GS_BUCKET=dvc-bench" >> "$GITHUB_ENV" | |
- name: run benchmarks | |
shell: bash | |
working-directory: dvc-gs/ | |
run: > | |
pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func | |
${{ matrix.test.path }} | |
--dvc-revs ${CLOUD_REVS} | |
--dataset ${DATASET} | |
--dvc-install-deps gs | |
--dvc-bench-repo ../dvc-bench --dvc-repo ../dvc | |
-W ignore | |
${{ matrix.test.path }} | |
${{ env.CPROFILE_ARGS }} | |
- name: upload raw results | |
uses: actions/upload-artifact@v4 | |
with: | |
include-hidden-files: true | |
name: .benchmarks-${{ matrix.test.name }} | |
path: dvc-gs/.benchmarks | |
- if: ${{ env.CPROFILE_ARGS }} | |
name: upload profiling results | |
uses: actions/upload-artifact@v4 | |
with: | |
name: prof-${{ matrix.test.name }} | |
path: dvc-gs/prof | |
notify: | |
if: github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main' && failure() | |
needs: [build, build_s3, build_azure, build_gs] | |
runs-on: ubuntu-latest | |
steps: | |
- name: Slack Notification | |
uses: rtCamp/[email protected] | |
env: | |
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
SLACK_COLOR: ${{ job.status }} | |
SLACK_MESSAGE: 'Benchmarks failed on main :confused_dog:' | |
SLACK_TITLE: ":dvc:" | |
SLACK_USERNAME: dvc-bench | |
compare: | |
name: join results and publish | |
needs: [build, build_s3, build_azure, build_gs] | |
if: always() && !contains(needs.*.result, 'failure') | |
runs-on: ubuntu-latest | |
environment: aws | |
permissions: | |
pages: write | |
pull-requests: write | |
contents: write | |
id-token: write | |
steps: | |
- if: ${{ env.CPROFILE_ARGS }} | |
uses: actions/upload-artifact/merge@v4 | |
with: | |
name: all-prof | |
pattern: prof-* | |
delete-merged: true | |
- uses: actions/upload-artifact/merge@v4 | |
with: | |
include-hidden-files: true | |
name: all-benchmarks | |
pattern: .benchmarks-* | |
delete-merged: true | |
- uses: iterative/setup-cml@v3 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
- uses: actions/checkout@v4 | |
- uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.DVC_REPOSITORY }} | |
ref: ${{ env.DVC_REF }} | |
path: dvc | |
fetch-depth: 0 | |
- uses: astral-sh/setup-uv@v4 | |
with: | |
enable-cache: true | |
cache-dependency-glob: | | |
requirements.txt | |
dvc/pyproject.toml | |
- name: install requirements | |
run: uv pip install -r requirements.txt "./dvc[testing]" | |
- name: download ubuntu results | |
uses: actions/download-artifact@v4 | |
with: | |
pattern: all-benchmarks | |
path: .benchmarks | |
- name: compare results | |
shell: bash | |
run: ./scripts/ci/gen_html.sh | |
- name: configure AWS credentials | |
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::260760892802:role/dvc-bench | |
aws-region: us-east-2 | |
- name: upload to s3 | |
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | |
run: aws s3 cp html/index.html s3://dvc-bench/pages/${{ github.event_name }}/${{ github.run_id }}_${{ github.run_attempt }}.html | |
- name: generate pages | |
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | |
run: | | |
mv html/index.html html/latest.html | |
mkdir html/workflow_dispatch html/schedule | |
LATEST_SCHEDULE="$(aws s3 ls s3://dvc-bench/pages/schedule/ --recursive | sort -r | head -n 30 | awk '{print $4}')" | |
LATEST_DISPATCH="$(aws s3 ls s3://dvc-bench/pages/workflow_dispatch/ --recursive | sort -r | head -n 30 | awk '{print $4}')" | |
echo "$LATEST_SCHEDULE" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/schedule | |
echo "$LATEST_DISPATCH" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/workflow_dispatch | |
cp html/schedule/$(echo "$LATEST_SCHEDULE" | head -1 | xargs basename) html/schedule/latest.html | |
cp html/workflow_dispatch/$(echo "$LATEST_DISPATCH" | head -1 | xargs basename) html/workflow_dispatch/latest.html | |
- name: generate listings | |
uses: jayanta525/[email protected] | |
with: | |
FOLDER: html | |
- name: post comment | |
env: | |
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
if: ${{ github.event_name == 'pull_request' && ! github.event.pull_request.head.repo.fork }} | |
run: | | |
cml comment update --watermark-title='dvc-bench report' report.md | |
- name: Setup Pages | |
id: pages | |
uses: actions/configure-pages@v5 | |
- name: Upload artifact | |
uses: actions/upload-pages-artifact@v3 | |
with: | |
path: ./html | |
deploy: | |
environment: | |
name: github-pages | |
url: ${{ steps.deployment.outputs.page_url }} | |
permissions: | |
pages: write | |
id-token: write | |
runs-on: ubuntu-latest | |
needs: compare | |
if: always() && !contains(needs.*.result, 'failure') && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') | |
steps: | |
- name: Deploy to GitHub Pages | |
id: deployment | |
uses: actions/deploy-pages@v4 |