-
Notifications
You must be signed in to change notification settings - Fork 306
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add workflow to run evaluation on a subset of datasets (#222)
*Issue #, if available:* *Description of changes:* This PR adds a workflow that will run the evaluation script on `chronos-bolt-small` for a subset of datasets specified in `ci/evaluate/backtest_configs.yaml`. After evaluation, a comment will be made on the PR. The workflow will only run if the `run-eval` label is present on a PR. The end-to-end workflow has been split into two workflows: - `eval-model.yml`: only has read access (can be run from forks). This will evaluate the model and upload the metrics CSV file as a Github artifact. - `eval-pr-comment.yml`: has read and write access (can only be run when in the `main` branch). This will be triggered when the first job finishes, will download the CSV from the eval job and make the comment. According to [this post](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/), splitting into two jobs as done here is the recommended and secure way to do this. **NOTE**: The first steps works as expected, but we can only test the second step after the merging because this workflow needs to be part of the `main` branch for this to work. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Abdul Fatir Ansari <[email protected]>
- Loading branch information
1 parent
e3bbda7
commit eac768c
Showing
3 changed files
with
126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Evaluates Chronos-Bolt (Small) model on selected datasets | ||
name: Evaluate | ||
|
||
on: | ||
# Runs only with read privilages for the GITHUB_TOKEN | ||
pull_request: | ||
branches: ["main"] # Run on PRs to main branch | ||
|
||
jobs: | ||
evaluate-and-post: | ||
if: contains(github.event.pull_request.labels.*.name, 'run-eval') # Only run if 'run-eval' label is added | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout Repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.11' | ||
|
||
- name: Install Dependencies | ||
run: pip install ".[evaluation]" -f https://download.pytorch.org/whl/cpu/torch_stable.html | ||
|
||
- name: Run Eval Script | ||
run: python scripts/evaluation/evaluate.py ci/evaluate/backtest_config.yaml eval-ci-metrics.csv --chronos-model-id=amazon/chronos-bolt-small --device=cpu --torch-dtype=float32 | ||
|
||
- name: Upload CSV | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: eval-metrics | ||
path: eval-ci-metrics.csv | ||
retention-days: 1 | ||
overwrite: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Post evaluation results from the "Evaluate" workflow as a PR comment | ||
name: Post Eval Metrics | ||
|
||
on: | ||
# Runs with read & write privilages for the GITHUB_TOKEN | ||
workflow_run: | ||
workflows: ["Evaluate"] | ||
types: | ||
- completed | ||
|
||
jobs: | ||
comment-eval-results: | ||
if: > | ||
github.event.workflow_run.event == 'pull_request' && | ||
github.event.workflow_run.conclusion == 'success' | ||
runs-on: ubuntu-latest | ||
permissions: | ||
actions: read # for downloading artifacts | ||
pull-requests: write # for posting PR comment | ||
|
||
steps: | ||
- name: Download Eval Metrics | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: eval-metrics | ||
path: eval-metrics-artifact/ | ||
github-token: ${{ secrets.GITHUB_TOKEN }} | ||
run-id: ${{ github.event.workflow_run.id }} | ||
|
||
- name: Display structure of downloaded files | ||
run: ls -R | ||
|
||
- name: Read CSV | ||
id: csv | ||
uses: juliangruber/read-file-action@v1 | ||
with: | ||
path: eval-metrics-artifact/eval-ci-metrics.csv | ||
|
||
- name: Create Markdown Table | ||
uses: petems/csv-to-md-table-action@master | ||
id: csv-table-output | ||
with: | ||
csvinput: ${{ steps.csv.outputs.content }} | ||
|
||
- name: Post Table as a Comment | ||
uses: peter-evans/create-or-update-comment@v4 | ||
with: | ||
token: ${{ secrets.GITHUB_TOKEN }} | ||
repository: ${{ github.repository }} | ||
issue-number: ${{ github.event.pull_request.number }} | ||
body: | | ||
### Evaluation Metrics | ||
${{steps.csv-table-output.outputs.markdown-table}} | ||
reactions: rocket |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# From In-domain | ||
- name: taxi_30min # 30 min | ||
hf_repo: autogluon/chronos_datasets | ||
offset: -48 | ||
prediction_length: 48 | ||
num_rolls: 1 | ||
# From Zero-shot | ||
- name: ETTh # Hourly | ||
hf_repo: autogluon/chronos_datasets_extra | ||
offset: -24 | ||
prediction_length: 24 | ||
num_rolls: 1 | ||
- name: monash_covid_deaths # Daily | ||
hf_repo: autogluon/chronos_datasets | ||
offset: -30 | ||
prediction_length: 30 | ||
num_rolls: 1 | ||
- name: monash_nn5_weekly # Weekly | ||
hf_repo: autogluon/chronos_datasets | ||
offset: -8 | ||
prediction_length: 8 | ||
num_rolls: 1 | ||
- name: monash_fred_md # Monthly | ||
hf_repo: autogluon/chronos_datasets | ||
offset: -12 | ||
prediction_length: 12 | ||
num_rolls: 1 | ||
- name: monash_m3_quarterly # Quarterly | ||
hf_repo: autogluon/chronos_datasets | ||
offset: -8 | ||
prediction_length: 8 | ||
num_rolls: 1 | ||
- name: monash_tourism_yearly # Yearly | ||
hf_repo: autogluon/chronos_datasets | ||
offset: -4 | ||
prediction_length: 4 | ||
num_rolls: 1 |