From c887278706b2e276135c8980a45d2fa6a0d0953d Mon Sep 17 00:00:00 2001 From: Abdul Fatir Date: Fri, 29 Nov 2024 10:33:41 +0100 Subject: [PATCH] Clean up evaluation script (#218) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit *Issue #, if available:* *Description of changes:* This PR cleans up eval script by using `DateTimeIndex.to_period()` instead of the ugly frequency mapping. Not sure what I was doing before. 🫠 By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. Co-authored-by: Abdul Fatir Ansari --- scripts/evaluation/evaluate.py | 160 +-------------------------------- 1 file changed, 3 insertions(+), 157 deletions(-) diff --git a/scripts/evaluation/evaluate.py b/scripts/evaluation/evaluate.py index 8995e31..9c9acff 100644 --- a/scripts/evaluation/evaluate.py +++ b/scripts/evaluation/evaluate.py @@ -24,161 +24,6 @@ from chronos import ( app = typer.Typer(pretty_exceptions_enable=False) -# Taken from pandas._libs.tslibs.dtypes.OFFSET_TO_PERIOD_FREQSTR -offset_alias_to_period_alias = { - "WEEKDAY": "D", - "EOM": "M", - "BME": "M", - "SME": "M", - "BQS": "Q", - "QS": "Q", - "BQE": "Q", - "BQE-DEC": "Q", - "BQE-JAN": "Q", - "BQE-FEB": "Q", - "BQE-MAR": "Q", - "BQE-APR": "Q", - "BQE-MAY": "Q", - "BQE-JUN": "Q", - "BQE-JUL": "Q", - "BQE-AUG": "Q", - "BQE-SEP": "Q", - "BQE-OCT": "Q", - "BQE-NOV": "Q", - "MS": "M", - "D": "D", - "B": "B", - "min": "min", - "s": "s", - "ms": "ms", - "us": "us", - "ns": "ns", - "h": "h", - "QE": "Q", - "QE-DEC": "Q-DEC", - "QE-JAN": "Q-JAN", - "QE-FEB": "Q-FEB", - "QE-MAR": "Q-MAR", - "QE-APR": "Q-APR", - "QE-MAY": "Q-MAY", - "QE-JUN": "Q-JUN", - "QE-JUL": "Q-JUL", - "QE-AUG": "Q-AUG", - "QE-SEP": "Q-SEP", - "QE-OCT": "Q-OCT", - "QE-NOV": "Q-NOV", - "YE": "Y", - "YE-DEC": "Y-DEC", - "YE-JAN": "Y-JAN", - "YE-FEB": "Y-FEB", - "YE-MAR": "Y-MAR", - "YE-APR": "Y-APR", - "YE-MAY": "Y-MAY", - "YE-JUN": "Y-JUN", - "YE-JUL": "Y-JUL", - "YE-AUG": "Y-AUG", - "YE-SEP": "Y-SEP", - "YE-OCT": "Y-OCT", - "YE-NOV": "Y-NOV", - "W": "W", - "ME": "M", - "Y": "Y", - "BYE": "Y", - "BYE-DEC": "Y", - "BYE-JAN": "Y", - "BYE-FEB": "Y", - "BYE-MAR": "Y", - "BYE-APR": "Y", - "BYE-MAY": "Y", - "BYE-JUN": "Y", - "BYE-JUL": "Y", - "BYE-AUG": "Y", - "BYE-SEP": "Y", - "BYE-OCT": "Y", - "BYE-NOV": "Y", - "YS": "Y", - "BYS": "Y", - "QS-JAN": "Q", - "QS-FEB": "Q", - "QS-MAR": "Q", - "QS-APR": "Q", - "QS-MAY": "Q", - "QS-JUN": "Q", - "QS-JUL": "Q", - "QS-AUG": "Q", - "QS-SEP": "Q", - "QS-OCT": "Q", - "QS-NOV": "Q", - "QS-DEC": "Q", - "BQS-JAN": "Q", - "BQS-FEB": "Q", - "BQS-MAR": "Q", - "BQS-APR": "Q", - "BQS-MAY": "Q", - "BQS-JUN": "Q", - "BQS-JUL": "Q", - "BQS-AUG": "Q", - "BQS-SEP": "Q", - "BQS-OCT": "Q", - "BQS-NOV": "Q", - "BQS-DEC": "Q", - "YS-JAN": "Y", - "YS-FEB": "Y", - "YS-MAR": "Y", - "YS-APR": "Y", - "YS-MAY": "Y", - "YS-JUN": "Y", - "YS-JUL": "Y", - "YS-AUG": "Y", - "YS-SEP": "Y", - "YS-OCT": "Y", - "YS-NOV": "Y", - "YS-DEC": "Y", - "BYS-JAN": "Y", - "BYS-FEB": "Y", - "BYS-MAR": "Y", - "BYS-APR": "Y", - "BYS-MAY": "Y", - "BYS-JUN": "Y", - "BYS-JUL": "Y", - "BYS-AUG": "Y", - "BYS-SEP": "Y", - "BYS-OCT": "Y", - "BYS-NOV": "Y", - "BYS-DEC": "Y", - "Y-JAN": "Y-JAN", - "Y-FEB": "Y-FEB", - "Y-MAR": "Y-MAR", - "Y-APR": "Y-APR", - "Y-MAY": "Y-MAY", - "Y-JUN": "Y-JUN", - "Y-JUL": "Y-JUL", - "Y-AUG": "Y-AUG", - "Y-SEP": "Y-SEP", - "Y-OCT": "Y-OCT", - "Y-NOV": "Y-NOV", - "Y-DEC": "Y-DEC", - "Q-JAN": "Q-JAN", - "Q-FEB": "Q-FEB", - "Q-MAR": "Q-MAR", - "Q-APR": "Q-APR", - "Q-MAY": "Q-MAY", - "Q-JUN": "Q-JUN", - "Q-JUL": "Q-JUL", - "Q-AUG": "Q-AUG", - "Q-SEP": "Q-SEP", - "Q-OCT": "Q-OCT", - "Q-NOV": "Q-NOV", - "Q-DEC": "Q-DEC", - "W-MON": "W-MON", - "W-TUE": "W-TUE", - "W-WED": "W-WED", - "W-THU": "W-THU", - "W-FRI": "W-FRI", - "W-SAT": "W-SAT", - "W-SUN": "W-SUN", -} - def to_gluonts_univariate(hf_dataset: datasets.Dataset): series_fields = [ @@ -188,8 +33,9 @@ def to_gluonts_univariate(hf_dataset: datasets.Dataset): ] series_fields.remove("timestamp") dataset_length = hf_dataset.info.splits["train"].num_examples * len(series_fields) - dataset_freq = pd.infer_freq(hf_dataset[0]["timestamp"]) - dataset_freq = offset_alias_to_period_alias.get(dataset_freq, dataset_freq) + + # Assumes that all time series in the dataset have the same frequency + dataset_freq = pd.DatetimeIndex(hf_dataset[0]["timestamp"]).to_period()[0].freqstr gts_dataset = [] for hf_entry in hf_dataset: