chronos-forecasting/scripts/evaluation/agg-relative-score.py

61 lines
1.9 KiB
Python
Raw Permalink Normal View History

import pandas as pd
import typer
from scipy.stats import gmean
from pathlib import Path
app = typer.Typer(pretty_exceptions_enable=False)
DEFAULT_RESULTS_DIR = Path(__file__).parent / "results"
def agg_relative_score(model_csv: Path, baseline_csv: Path):
model_df = pd.read_csv(model_csv).set_index("dataset")
baseline_df = pd.read_csv(baseline_csv).set_index("dataset")
relative_score = model_df.drop("model", axis="columns") / baseline_df.drop(
"model", axis="columns"
)
return relative_score.agg(gmean)
@app.command()
def main(
model_name: str,
baseline_name: str = "seasonal-naive",
results_dir: Path = DEFAULT_RESULTS_DIR,
):
"""
Compute the aggregated relative score as reported in the Chronos paper.
Results will be saved to {results_dir}/{model_name}-agg-rel-scores.csv
Parameters
----------
model_name : str
Name of the model used in the CSV files. The in-domain and zero-shot CSVs
are expected to be named {model_name}-in-domain.csv and {model_name}-zero-shot.csv.
results_dir : Path, optional, default = results/
Directory where results CSVs generated by evaluate.py are stored
"""
in_domain_agg_score_df = agg_relative_score(
results_dir / f"{model_name}-in-domain.csv",
results_dir / f"{baseline_name}-in-domain.csv",
)
in_domain_agg_score_df.name = "value"
in_domain_agg_score_df.index.name = "metric"
zero_shot_agg_score_df = agg_relative_score(
results_dir / f"{model_name}-zero-shot.csv",
results_dir / f"{baseline_name}-zero-shot.csv",
)
zero_shot_agg_score_df.name = "value"
zero_shot_agg_score_df.index.name = "metric"
agg_score_df = pd.concat(
{"in-domain": in_domain_agg_score_df, "zero-shot": zero_shot_agg_score_df},
names=["benchmark"],
)
agg_score_df.to_csv(f"{results_dir}/{model_name}-agg-rel-scores.csv")
if __name__ == "__main__":
app()