chronos-forecasting/scripts/evaluation/agg-relative-score.py

import pandas as pd
import typer
from scipy.stats import gmean
from pathlib import Path

app = typer.Typer(pretty_exceptions_enable=False)
DEFAULT_RESULTS_DIR = Path(__file__).parent / "results"


def agg_relative_score(model_csv: Path, baseline_csv: Path):
    model_df = pd.read_csv(model_csv).set_index("dataset")
    baseline_df = pd.read_csv(baseline_csv).set_index("dataset")
    relative_score = model_df.drop("model", axis="columns") / baseline_df.drop(
        "model", axis="columns"
    )
    return relative_score.agg(gmean)


@app.command()
def main(
    model_name: str,
    baseline_name: str = "seasonal-naive",
    results_dir: Path = DEFAULT_RESULTS_DIR,
):
    """
    Compute the aggregated relative score as reported in the Chronos paper.
    Results will be saved to {results_dir}/{model_name}-agg-rel-scores.csv

    Parameters
    ----------
    model_name : str
        Name of the model used in the CSV files. The in-domain and zero-shot CSVs
        are expected to be named {model_name}-in-domain.csv and {model_name}-zero-shot.csv.
    results_dir : Path, optional, default = results/
        Directory where results CSVs generated by evaluate.py are stored
    """

    in_domain_agg_score_df = agg_relative_score(
        results_dir / f"{model_name}-in-domain.csv",
        results_dir / f"{baseline_name}-in-domain.csv",
    )
    in_domain_agg_score_df.name = "value"
    in_domain_agg_score_df.index.name = "metric"

    zero_shot_agg_score_df = agg_relative_score(
        results_dir / f"{model_name}-zero-shot.csv",
        results_dir / f"{baseline_name}-zero-shot.csv",
    )
    zero_shot_agg_score_df.name = "value"
    zero_shot_agg_score_df.index.name = "metric"

    agg_score_df = pd.concat(
        {"in-domain": in_domain_agg_score_df, "zero-shot": zero_shot_agg_score_df},
        names=["benchmark"],
    )
    agg_score_df.to_csv(f"{results_dir}/{model_name}-agg-rel-scores.csv")


if __name__ == "__main__":
    app()
:zap: Add support for Chronos-Bolt models (#204) Issue #, if available: N/A Description of changes: This PR adds support for Chronos-Bolt models. TODOs: - [x] Update evaluation script - [x] Fix and add tests for Bolt - [x] Update docstrings - [x] Update README example and mention Chronos-Bolt - [x] Update results bar plot in README - [x] Add versions for libraries in `pyproject.toml` - [x] Check that the training and eval scripts work - [x] Change `autogluon` -> `amazon` in model names Post Merge: - [ ] Update Citation style in README, both Github and HuggingFace repos - [ ] Remove note about AutoGluon - [ ] Update READMEs of original Chronos models to refer to Chronos-Bolt NOTE: To be merged after Chronos-Bolt models are available under the `amazon` namespace on HF. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Abdul Fatir Ansari <ansarnd@amazon.de> Co-authored-by: Caner Turkmen <turkmen.ac@gmail.com> Co-authored-by: Lorenzo Stella <stellalo@amazon.com> 2024-11-26 16:47:14 +00:00			`import pandas as pd`
			`import typer`
			`from scipy.stats import gmean`
			`from pathlib import Path`

			`app = typer.Typer(pretty_exceptions_enable=False)`
			`DEFAULT_RESULTS_DIR = Path(__file__).parent / "results"`


			`def agg_relative_score(model_csv: Path, baseline_csv: Path):`
			`model_df = pd.read_csv(model_csv).set_index("dataset")`
			`baseline_df = pd.read_csv(baseline_csv).set_index("dataset")`
			`relative_score = model_df.drop("model", axis="columns") / baseline_df.drop(`
			`"model", axis="columns"`
			`)`
			`return relative_score.agg(gmean)`


			`@app.command()`
			`def main(`
			`model_name: str,`
			`baseline_name: str = "seasonal-naive",`
			`results_dir: Path = DEFAULT_RESULTS_DIR,`
			`):`
			`"""`
			`Compute the aggregated relative score as reported in the Chronos paper.`
			`Results will be saved to {results_dir}/{model_name}-agg-rel-scores.csv`

			`Parameters`
			`----------`
			`model_name : str`
			`Name of the model used in the CSV files. The in-domain and zero-shot CSVs`
			`are expected to be named {model_name}-in-domain.csv and {model_name}-zero-shot.csv.`
			`results_dir : Path, optional, default = results/`
			`Directory where results CSVs generated by evaluate.py are stored`
			`"""`

			`in_domain_agg_score_df = agg_relative_score(`
			`results_dir / f"{model_name}-in-domain.csv",`
			`results_dir / f"{baseline_name}-in-domain.csv",`
			`)`
			`in_domain_agg_score_df.name = "value"`
			`in_domain_agg_score_df.index.name = "metric"`

			`zero_shot_agg_score_df = agg_relative_score(`
			`results_dir / f"{model_name}-zero-shot.csv",`
			`results_dir / f"{baseline_name}-zero-shot.csv",`
			`)`
			`zero_shot_agg_score_df.name = "value"`
			`zero_shot_agg_score_df.index.name = "metric"`

			`agg_score_df = pd.concat(`
			`{"in-domain": in_domain_agg_score_df, "zero-shot": zero_shot_agg_score_df},`
			`names=["benchmark"],`
			`)`
			`agg_score_df.to_csv(f"{results_dir}/{model_name}-agg-rel-scores.csv")`


			`if __name__ == "__main__":`
			`app()`