Clean up evaluation script (#218)

*Issue #, if available:*

*Description of changes:* This PR cleans up eval script by using
`DateTimeIndex.to_period()` instead of the ugly frequency mapping. Not
sure what I was doing before. 🫠


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

Co-authored-by: Abdul Fatir Ansari <ansarnd@amazon.de>
This commit is contained in:
Abdul Fatir 2024-11-29 10:33:41 +01:00 committed by GitHub
parent ead8323d76
commit c887278706
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -24,161 +24,6 @@ from chronos import (
app = typer.Typer(pretty_exceptions_enable=False)
# Taken from pandas._libs.tslibs.dtypes.OFFSET_TO_PERIOD_FREQSTR
offset_alias_to_period_alias = {
"WEEKDAY": "D",
"EOM": "M",
"BME": "M",
"SME": "M",
"BQS": "Q",
"QS": "Q",
"BQE": "Q",
"BQE-DEC": "Q",
"BQE-JAN": "Q",
"BQE-FEB": "Q",
"BQE-MAR": "Q",
"BQE-APR": "Q",
"BQE-MAY": "Q",
"BQE-JUN": "Q",
"BQE-JUL": "Q",
"BQE-AUG": "Q",
"BQE-SEP": "Q",
"BQE-OCT": "Q",
"BQE-NOV": "Q",
"MS": "M",
"D": "D",
"B": "B",
"min": "min",
"s": "s",
"ms": "ms",
"us": "us",
"ns": "ns",
"h": "h",
"QE": "Q",
"QE-DEC": "Q-DEC",
"QE-JAN": "Q-JAN",
"QE-FEB": "Q-FEB",
"QE-MAR": "Q-MAR",
"QE-APR": "Q-APR",
"QE-MAY": "Q-MAY",
"QE-JUN": "Q-JUN",
"QE-JUL": "Q-JUL",
"QE-AUG": "Q-AUG",
"QE-SEP": "Q-SEP",
"QE-OCT": "Q-OCT",
"QE-NOV": "Q-NOV",
"YE": "Y",
"YE-DEC": "Y-DEC",
"YE-JAN": "Y-JAN",
"YE-FEB": "Y-FEB",
"YE-MAR": "Y-MAR",
"YE-APR": "Y-APR",
"YE-MAY": "Y-MAY",
"YE-JUN": "Y-JUN",
"YE-JUL": "Y-JUL",
"YE-AUG": "Y-AUG",
"YE-SEP": "Y-SEP",
"YE-OCT": "Y-OCT",
"YE-NOV": "Y-NOV",
"W": "W",
"ME": "M",
"Y": "Y",
"BYE": "Y",
"BYE-DEC": "Y",
"BYE-JAN": "Y",
"BYE-FEB": "Y",
"BYE-MAR": "Y",
"BYE-APR": "Y",
"BYE-MAY": "Y",
"BYE-JUN": "Y",
"BYE-JUL": "Y",
"BYE-AUG": "Y",
"BYE-SEP": "Y",
"BYE-OCT": "Y",
"BYE-NOV": "Y",
"YS": "Y",
"BYS": "Y",
"QS-JAN": "Q",
"QS-FEB": "Q",
"QS-MAR": "Q",
"QS-APR": "Q",
"QS-MAY": "Q",
"QS-JUN": "Q",
"QS-JUL": "Q",
"QS-AUG": "Q",
"QS-SEP": "Q",
"QS-OCT": "Q",
"QS-NOV": "Q",
"QS-DEC": "Q",
"BQS-JAN": "Q",
"BQS-FEB": "Q",
"BQS-MAR": "Q",
"BQS-APR": "Q",
"BQS-MAY": "Q",
"BQS-JUN": "Q",
"BQS-JUL": "Q",
"BQS-AUG": "Q",
"BQS-SEP": "Q",
"BQS-OCT": "Q",
"BQS-NOV": "Q",
"BQS-DEC": "Q",
"YS-JAN": "Y",
"YS-FEB": "Y",
"YS-MAR": "Y",
"YS-APR": "Y",
"YS-MAY": "Y",
"YS-JUN": "Y",
"YS-JUL": "Y",
"YS-AUG": "Y",
"YS-SEP": "Y",
"YS-OCT": "Y",
"YS-NOV": "Y",
"YS-DEC": "Y",
"BYS-JAN": "Y",
"BYS-FEB": "Y",
"BYS-MAR": "Y",
"BYS-APR": "Y",
"BYS-MAY": "Y",
"BYS-JUN": "Y",
"BYS-JUL": "Y",
"BYS-AUG": "Y",
"BYS-SEP": "Y",
"BYS-OCT": "Y",
"BYS-NOV": "Y",
"BYS-DEC": "Y",
"Y-JAN": "Y-JAN",
"Y-FEB": "Y-FEB",
"Y-MAR": "Y-MAR",
"Y-APR": "Y-APR",
"Y-MAY": "Y-MAY",
"Y-JUN": "Y-JUN",
"Y-JUL": "Y-JUL",
"Y-AUG": "Y-AUG",
"Y-SEP": "Y-SEP",
"Y-OCT": "Y-OCT",
"Y-NOV": "Y-NOV",
"Y-DEC": "Y-DEC",
"Q-JAN": "Q-JAN",
"Q-FEB": "Q-FEB",
"Q-MAR": "Q-MAR",
"Q-APR": "Q-APR",
"Q-MAY": "Q-MAY",
"Q-JUN": "Q-JUN",
"Q-JUL": "Q-JUL",
"Q-AUG": "Q-AUG",
"Q-SEP": "Q-SEP",
"Q-OCT": "Q-OCT",
"Q-NOV": "Q-NOV",
"Q-DEC": "Q-DEC",
"W-MON": "W-MON",
"W-TUE": "W-TUE",
"W-WED": "W-WED",
"W-THU": "W-THU",
"W-FRI": "W-FRI",
"W-SAT": "W-SAT",
"W-SUN": "W-SUN",
}
def to_gluonts_univariate(hf_dataset: datasets.Dataset):
series_fields = [
@ -188,8 +33,9 @@ def to_gluonts_univariate(hf_dataset: datasets.Dataset):
]
series_fields.remove("timestamp")
dataset_length = hf_dataset.info.splits["train"].num_examples * len(series_fields)
dataset_freq = pd.infer_freq(hf_dataset[0]["timestamp"])
dataset_freq = offset_alias_to_period_alias.get(dataset_freq, dataset_freq)
# Assumes that all time series in the dataset have the same frequency
dataset_freq = pd.DatetimeIndex(hf_dataset[0]["timestamp"]).to_period()[0].freqstr
gts_dataset = []
for hf_entry in hf_dataset: