From eac768ce28a94a962894693289d7b7507f14e4fc Mon Sep 17 00:00:00 2001 From: Abdul Fatir Date: Mon, 2 Dec 2024 10:05:57 +0100 Subject: [PATCH] Add workflow to run evaluation on a subset of datasets (#222) *Issue #, if available:* *Description of changes:* This PR adds a workflow that will run the evaluation script on `chronos-bolt-small` for a subset of datasets specified in `ci/evaluate/backtest_configs.yaml`. After evaluation, a comment will be made on the PR. The workflow will only run if the `run-eval` label is present on a PR. The end-to-end workflow has been split into two workflows: - `eval-model.yml`: only has read access (can be run from forks). This will evaluate the model and upload the metrics CSV file as a Github artifact. - `eval-pr-comment.yml`: has read and write access (can only be run when in the `main` branch). This will be triggered when the first job finishes, will download the CSV from the eval job and make the comment. According to [this post](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/), splitting into two jobs as done here is the recommended and secure way to do this. **NOTE**: The first steps works as expected, but we can only test the second step after the merging because this workflow needs to be part of the `main` branch for this to work. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Abdul Fatir Ansari --- .github/workflows/eval-model.yml | 35 +++++++++++++++++ .github/workflows/eval-pr-comment.yml | 54 +++++++++++++++++++++++++++ ci/evaluate/backtest_config.yaml | 37 ++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 .github/workflows/eval-model.yml create mode 100644 .github/workflows/eval-pr-comment.yml create mode 100644 ci/evaluate/backtest_config.yaml diff --git a/.github/workflows/eval-model.yml b/.github/workflows/eval-model.yml new file mode 100644 index 0000000..0446870 --- /dev/null +++ b/.github/workflows/eval-model.yml @@ -0,0 +1,35 @@ +# Evaluates Chronos-Bolt (Small) model on selected datasets +name: Evaluate + +on: + # Runs only with read privilages for the GITHUB_TOKEN + pull_request: + branches: ["main"] # Run on PRs to main branch + +jobs: + evaluate-and-post: + if: contains(github.event.pull_request.labels.*.name, 'run-eval') # Only run if 'run-eval' label is added + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Dependencies + run: pip install ".[evaluation]" -f https://download.pytorch.org/whl/cpu/torch_stable.html + + - name: Run Eval Script + run: python scripts/evaluation/evaluate.py ci/evaluate/backtest_config.yaml eval-ci-metrics.csv --chronos-model-id=amazon/chronos-bolt-small --device=cpu --torch-dtype=float32 + + - name: Upload CSV + uses: actions/upload-artifact@v4 + with: + name: eval-metrics + path: eval-ci-metrics.csv + retention-days: 1 + overwrite: true diff --git a/.github/workflows/eval-pr-comment.yml b/.github/workflows/eval-pr-comment.yml new file mode 100644 index 0000000..1f0d18c --- /dev/null +++ b/.github/workflows/eval-pr-comment.yml @@ -0,0 +1,54 @@ +# Post evaluation results from the "Evaluate" workflow as a PR comment +name: Post Eval Metrics + +on: + # Runs with read & write privilages for the GITHUB_TOKEN + workflow_run: + workflows: ["Evaluate"] + types: + - completed + +jobs: + comment-eval-results: + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + runs-on: ubuntu-latest + permissions: + actions: read # for downloading artifacts + pull-requests: write # for posting PR comment + + steps: + - name: Download Eval Metrics + uses: actions/download-artifact@v4 + with: + name: eval-metrics + path: eval-metrics-artifact/ + github-token: ${{ secrets.GITHUB_TOKEN }} + run-id: ${{ github.event.workflow_run.id }} + + - name: Display structure of downloaded files + run: ls -R + + - name: Read CSV + id: csv + uses: juliangruber/read-file-action@v1 + with: + path: eval-metrics-artifact/eval-ci-metrics.csv + + - name: Create Markdown Table + uses: petems/csv-to-md-table-action@master + id: csv-table-output + with: + csvinput: ${{ steps.csv.outputs.content }} + + - name: Post Table as a Comment + uses: peter-evans/create-or-update-comment@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + repository: ${{ github.repository }} + issue-number: ${{ github.event.pull_request.number }} + body: | + ### Evaluation Metrics + ${{steps.csv-table-output.outputs.markdown-table}} + reactions: rocket \ No newline at end of file diff --git a/ci/evaluate/backtest_config.yaml b/ci/evaluate/backtest_config.yaml new file mode 100644 index 0000000..8843908 --- /dev/null +++ b/ci/evaluate/backtest_config.yaml @@ -0,0 +1,37 @@ +# From In-domain +- name: taxi_30min # 30 min + hf_repo: autogluon/chronos_datasets + offset: -48 + prediction_length: 48 + num_rolls: 1 +# From Zero-shot +- name: ETTh # Hourly + hf_repo: autogluon/chronos_datasets_extra + offset: -24 + prediction_length: 24 + num_rolls: 1 +- name: monash_covid_deaths # Daily + hf_repo: autogluon/chronos_datasets + offset: -30 + prediction_length: 30 + num_rolls: 1 +- name: monash_nn5_weekly # Weekly + hf_repo: autogluon/chronos_datasets + offset: -8 + prediction_length: 8 + num_rolls: 1 +- name: monash_fred_md # Monthly + hf_repo: autogluon/chronos_datasets + offset: -12 + prediction_length: 12 + num_rolls: 1 +- name: monash_m3_quarterly # Quarterly + hf_repo: autogluon/chronos_datasets + offset: -8 + prediction_length: 8 + num_rolls: 1 +- name: monash_tourism_yearly # Yearly + hf_repo: autogluon/chronos_datasets + offset: -4 + prediction_length: 4 + num_rolls: 1 \ No newline at end of file