mirror of
https://github.com/amazon-science/chronos-forecasting
synced 2026-05-24 10:08:33 +00:00
Add workflow to run evaluation on a subset of datasets (#222)
*Issue #, if available:* *Description of changes:* This PR adds a workflow that will run the evaluation script on `chronos-bolt-small` for a subset of datasets specified in `ci/evaluate/backtest_configs.yaml`. After evaluation, a comment will be made on the PR. The workflow will only run if the `run-eval` label is present on a PR. The end-to-end workflow has been split into two workflows: - `eval-model.yml`: only has read access (can be run from forks). This will evaluate the model and upload the metrics CSV file as a Github artifact. - `eval-pr-comment.yml`: has read and write access (can only be run when in the `main` branch). This will be triggered when the first job finishes, will download the CSV from the eval job and make the comment. According to [this post](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/), splitting into two jobs as done here is the recommended and secure way to do this. **NOTE**: The first steps works as expected, but we can only test the second step after the merging because this workflow needs to be part of the `main` branch for this to work. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Abdul Fatir Ansari <ansarnd@amazon.de>
This commit is contained in:
parent
e3bbda7207
commit
eac768ce28
3 changed files with 126 additions and 0 deletions
35
.github/workflows/eval-model.yml
vendored
Normal file
35
.github/workflows/eval-model.yml
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
# Evaluates Chronos-Bolt (Small) model on selected datasets
|
||||
name: Evaluate
|
||||
|
||||
on:
|
||||
# Runs only with read privilages for the GITHUB_TOKEN
|
||||
pull_request:
|
||||
branches: ["main"] # Run on PRs to main branch
|
||||
|
||||
jobs:
|
||||
evaluate-and-post:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-eval') # Only run if 'run-eval' label is added
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install Dependencies
|
||||
run: pip install ".[evaluation]" -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
|
||||
- name: Run Eval Script
|
||||
run: python scripts/evaluation/evaluate.py ci/evaluate/backtest_config.yaml eval-ci-metrics.csv --chronos-model-id=amazon/chronos-bolt-small --device=cpu --torch-dtype=float32
|
||||
|
||||
- name: Upload CSV
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: eval-metrics
|
||||
path: eval-ci-metrics.csv
|
||||
retention-days: 1
|
||||
overwrite: true
|
||||
54
.github/workflows/eval-pr-comment.yml
vendored
Normal file
54
.github/workflows/eval-pr-comment.yml
vendored
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# Post evaluation results from the "Evaluate" workflow as a PR comment
|
||||
name: Post Eval Metrics
|
||||
|
||||
on:
|
||||
# Runs with read & write privilages for the GITHUB_TOKEN
|
||||
workflow_run:
|
||||
workflows: ["Evaluate"]
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
comment-eval-results:
|
||||
if: >
|
||||
github.event.workflow_run.event == 'pull_request' &&
|
||||
github.event.workflow_run.conclusion == 'success'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read # for downloading artifacts
|
||||
pull-requests: write # for posting PR comment
|
||||
|
||||
steps:
|
||||
- name: Download Eval Metrics
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: eval-metrics
|
||||
path: eval-metrics-artifact/
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
run-id: ${{ github.event.workflow_run.id }}
|
||||
|
||||
- name: Display structure of downloaded files
|
||||
run: ls -R
|
||||
|
||||
- name: Read CSV
|
||||
id: csv
|
||||
uses: juliangruber/read-file-action@v1
|
||||
with:
|
||||
path: eval-metrics-artifact/eval-ci-metrics.csv
|
||||
|
||||
- name: Create Markdown Table
|
||||
uses: petems/csv-to-md-table-action@master
|
||||
id: csv-table-output
|
||||
with:
|
||||
csvinput: ${{ steps.csv.outputs.content }}
|
||||
|
||||
- name: Post Table as a Comment
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repository: ${{ github.repository }}
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body: |
|
||||
### Evaluation Metrics
|
||||
${{steps.csv-table-output.outputs.markdown-table}}
|
||||
reactions: rocket
|
||||
37
ci/evaluate/backtest_config.yaml
Normal file
37
ci/evaluate/backtest_config.yaml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# From In-domain
|
||||
- name: taxi_30min # 30 min
|
||||
hf_repo: autogluon/chronos_datasets
|
||||
offset: -48
|
||||
prediction_length: 48
|
||||
num_rolls: 1
|
||||
# From Zero-shot
|
||||
- name: ETTh # Hourly
|
||||
hf_repo: autogluon/chronos_datasets_extra
|
||||
offset: -24
|
||||
prediction_length: 24
|
||||
num_rolls: 1
|
||||
- name: monash_covid_deaths # Daily
|
||||
hf_repo: autogluon/chronos_datasets
|
||||
offset: -30
|
||||
prediction_length: 30
|
||||
num_rolls: 1
|
||||
- name: monash_nn5_weekly # Weekly
|
||||
hf_repo: autogluon/chronos_datasets
|
||||
offset: -8
|
||||
prediction_length: 8
|
||||
num_rolls: 1
|
||||
- name: monash_fred_md # Monthly
|
||||
hf_repo: autogluon/chronos_datasets
|
||||
offset: -12
|
||||
prediction_length: 12
|
||||
num_rolls: 1
|
||||
- name: monash_m3_quarterly # Quarterly
|
||||
hf_repo: autogluon/chronos_datasets
|
||||
offset: -8
|
||||
prediction_length: 8
|
||||
num_rolls: 1
|
||||
- name: monash_tourism_yearly # Yearly
|
||||
hf_repo: autogluon/chronos_datasets
|
||||
offset: -4
|
||||
prediction_length: 4
|
||||
num_rolls: 1
|
||||
Loading…
Reference in a new issue