import pandas as pd import typer from scipy.stats import gmean from pathlib import Path app = typer.Typer(pretty_exceptions_enable=False) DEFAULT_RESULTS_DIR = Path(__file__).parent / "results" def agg_relative_score(model_csv: Path, baseline_csv: Path): model_df = pd.read_csv(model_csv).set_index("dataset") baseline_df = pd.read_csv(baseline_csv).set_index("dataset") relative_score = model_df.drop("model", axis="columns") / baseline_df.drop( "model", axis="columns" ) return relative_score.agg(gmean) @app.command() def main( model_name: str, baseline_name: str = "seasonal-naive", results_dir: Path = DEFAULT_RESULTS_DIR, ): """ Compute the aggregated relative score as reported in the Chronos paper. Results will be saved to {results_dir}/{model_name}-agg-rel-scores.csv Parameters ---------- model_name : str Name of the model used in the CSV files. The in-domain and zero-shot CSVs are expected to be named {model_name}-in-domain.csv and {model_name}-zero-shot.csv. results_dir : Path, optional, default = results/ Directory where results CSVs generated by evaluate.py are stored """ in_domain_agg_score_df = agg_relative_score( results_dir / f"{model_name}-in-domain.csv", results_dir / f"{baseline_name}-in-domain.csv", ) in_domain_agg_score_df.name = "value" in_domain_agg_score_df.index.name = "metric" zero_shot_agg_score_df = agg_relative_score( results_dir / f"{model_name}-zero-shot.csv", results_dir / f"{baseline_name}-zero-shot.csv", ) zero_shot_agg_score_df.name = "value" zero_shot_agg_score_df.index.name = "metric" agg_score_df = pd.concat( {"in-domain": in_domain_agg_score_df, "zero-shot": zero_shot_agg_score_df}, names=["benchmark"], ) agg_score_df.to_csv(f"{results_dir}/{model_name}-agg-rel-scores.csv") if __name__ == "__main__": app()