amazon-science · abdulfatir · Dec 2, 2024 · Nov 29, 2024 · Nov 29, 2024 · Nov 29, 2024
diff --git a/.github/workflows/eval-model.yml b/.github/workflows/eval-model.yml
@@ -0,0 +1,35 @@
+# Evaluates Chronos-Bolt (Small) model on selected datasets
+name: Evaluate
+
+on:
+  # Runs only with read privilages for the GITHUB_TOKEN
+  pull_request:
+    branches: ["main"] # Run on PRs to main branch
+
+jobs:
+  evaluate-and-post:
+    if: contains(github.event.pull_request.labels.*.name, 'run-eval')  # Only run if 'run-eval' label is added
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install Dependencies
+        run: pip install ".[evaluation]" -f https://download.pytorch.org/whl/cpu/torch_stable.html
+
+      - name: Run Eval Script
+        run: python scripts/evaluation/evaluate.py ci/evaluate/backtest_config.yaml eval-ci-metrics.csv --chronos-model-id=amazon/chronos-bolt-small --device=cpu --torch-dtype=float32
+
+      - name: Upload CSV
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-metrics
+          path: eval-ci-metrics.csv
+          retention-days: 1
+          overwrite: true
diff --git a/.github/workflows/eval-pr-comment.yml b/.github/workflows/eval-pr-comment.yml
@@ -0,0 +1,54 @@
+# Post evaluation results from the "Evaluate" workflow as a PR comment
+name: Post Eval Metrics
+
+on:
+  # Runs with read & write privilages for the GITHUB_TOKEN
+  workflow_run:
+    workflows: ["Evaluate"]
+    types:
+      - completed
+
+jobs:
+  comment-eval-results:
+    if: >
+      github.event.workflow_run.event == 'pull_request' &&
+      github.event.workflow_run.conclusion == 'success'
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read # for downloading artifacts
+      pull-requests: write # for posting PR comment
+
+    steps:
+      - name: Download Eval Metrics
+        uses: actions/download-artifact@v4
+        with:
+          name: eval-metrics
+          path: eval-metrics-artifact/
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          run-id: ${{ github.event.workflow_run.id }}
+
+      - name: Display structure of downloaded files
+        run: ls -R
+
+      - name: Read CSV
+        id: csv
+        uses: juliangruber/read-file-action@v1
+        with:
+          path: eval-metrics-artifact/eval-ci-metrics.csv
+
+      - name: Create Markdown Table
+        uses: petems/csv-to-md-table-action@master
+        id: csv-table-output
+        with:
+          csvinput: ${{ steps.csv.outputs.content }}
+
+      - name: Post Table as a Comment
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          repository: ${{ github.repository }}
+          issue-number: ${{ github.event.pull_request.number }}
+          body: |
+            ### Evaluation Metrics
+            ${{steps.csv-table-output.outputs.markdown-table}}
+          reactions: rocket
diff --git a/ci/evaluate/backtest_config.yaml b/ci/evaluate/backtest_config.yaml
@@ -0,0 +1,37 @@
+# From In-domain
+- name: taxi_30min # 30 min
+  hf_repo: autogluon/chronos_datasets
+  offset: -48
+  prediction_length: 48
+  num_rolls: 1
+# From Zero-shot
+- name: ETTh # Hourly
+  hf_repo: autogluon/chronos_datasets_extra
+  offset: -24
+  prediction_length: 24
+  num_rolls: 1
+- name: monash_covid_deaths # Daily
+  hf_repo: autogluon/chronos_datasets
+  offset: -30
+  prediction_length: 30
+  num_rolls: 1
+- name: monash_nn5_weekly # Weekly
+  hf_repo: autogluon/chronos_datasets
+  offset: -8
+  prediction_length: 8
+  num_rolls: 1
+- name: monash_fred_md # Monthly
+  hf_repo: autogluon/chronos_datasets
+  offset: -12
+  prediction_length: 12
+  num_rolls: 1
+- name: monash_m3_quarterly # Quarterly
+  hf_repo: autogluon/chronos_datasets
+  offset: -8
+  prediction_length: 8
+  num_rolls: 1
+- name: monash_tourism_yearly # Yearly
+  hf_repo: autogluon/chronos_datasets
+  offset: -4
+  prediction_length: 4
+  num_rolls: 1