Handle prediction_length below 3 (#407)

*Issue #, if available:* Fixes #403

*Description of changes:*
- Update the `future_df` validation logic to only check that
`prediction_length` values are provided for each item.
- Update unit tests for DF-based methods in `test_chronos2.py`
- Ignore fine-tuned checkpoint folders with `.gitignore`


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.
This commit is contained in:
Oleksandr Shchur 2025-12-01 13:10:16 +01:00 committed by GitHub
parent 8c686cfa71
commit 4eea8d0122
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 65 additions and 86 deletions

4
.gitignore vendored
View file

@ -160,4 +160,6 @@ cython_debug/
#.idea/
# macOS stuff
.DS_store
.DS_store
chronos-2-finetuned

View file

@ -185,25 +185,13 @@ def validate_df_inputs(
if context_ids != future_ids:
raise ValueError("future_df must contain the same time series IDs as df")
future_series_lengths = future_df[id_column].value_counts(sort=False).to_list()
# Validate future series lengths match prediction_length
future_start_idx = 0
future_timestamps_index = pd.DatetimeIndex(future_df[timestamp_column])
for future_length in future_series_lengths:
future_timestamps = future_timestamps_index[future_start_idx : future_start_idx + future_length]
future_series_id = future_df[id_column].iloc[future_start_idx]
if future_length != prediction_length:
raise ValueError(
f"Future covariates all time series must have length {prediction_length}, got {future_length} for series {future_series_id}"
)
if future_length < 3 or inferred_freq != validate_freq(future_timestamps, future_series_id):
raise ValueError(
f"Future covariates must have the same frequency as context, found series {future_series_id} with a different frequency"
)
future_start_idx += future_length
assert len(series_lengths) == len(future_series_lengths)
future_series_lengths = future_df[id_column].value_counts(sort=False)
if (future_series_lengths != prediction_length).any():
invalid_series = future_series_lengths[future_series_lengths != prediction_length]
raise ValueError(
f"future_df must contain {prediction_length=} values for each series, "
f"but found series with different lengths: {invalid_series.to_dict()}"
)
return df, future_df, inferred_freq, series_lengths, original_order
@ -303,10 +291,16 @@ def convert_df_input_to_list_of_dicts_input(
past_covariates_dict = {
col: df[col].to_numpy() for col in df.columns if col not in [id_column, timestamp_column] + target_columns
}
future_covariates_dict = {}
if future_df is not None:
future_covariates_dict = {
col: future_df[col].to_numpy() for col in future_df.columns if col not in [id_column, timestamp_column]
}
for col in future_df.columns.drop([id_column, timestamp_column]):
future_covariates_dict[col] = future_df[col].to_numpy()
if validate_inputs:
if (pd.DatetimeIndex(future_df[timestamp_column]) != pd.DatetimeIndex(prediction_timestamps_array)).any():
raise ValueError(
"future_df timestamps do not match the expected prediction timestamps. "
"You can disable this check by setting `validate_inputs=False`"
)
for i in range(len(series_lengths)):
start_idx, end_idx = indptr[i], indptr[i + 1]
@ -316,23 +310,12 @@ def convert_df_input_to_list_of_dicts_input(
prediction_timestamps[series_id] = prediction_timestamps_array[future_start_idx:future_end_idx]
task: dict[str, np.ndarray | dict[str, np.ndarray]] = {"target": target_array[:, start_idx:end_idx]}
# Handle covariates if present
if len(past_covariates_dict) > 0:
task["past_covariates"] = {col: values[start_idx:end_idx] for col, values in past_covariates_dict.items()}
# Handle future covariates
if future_df is not None:
first_future_timestamp = future_df[timestamp_column].iloc[future_start_idx]
assert first_future_timestamp == prediction_timestamps[series_id][0], (
f"the first timestamp in future_df must be the first forecast timestamp, found mismatch "
f"({first_future_timestamp} != {prediction_timestamps[series_id][0]}) in series {series_id}"
)
if len(future_covariates_dict) > 0:
task["future_covariates"] = {
col: values[future_start_idx:future_end_idx] for col, values in future_covariates_dict.items()
}
if len(future_covariates_dict) > 0:
task["future_covariates"] = {
col: values[future_start_idx:future_end_idx] for col, values in future_covariates_dict.items()
}
inputs.append(task)
assert len(inputs) == len(series_lengths)

View file

@ -421,43 +421,39 @@ def test_pipeline_can_evaluate_on_dummy_fev_task(pipeline, task_kwargs):
@pytest.mark.parametrize(
"context_setup, future_setup, expected_rows",
"context_setup, future_setup",
[
# Targets only
({}, None, 6), # 2 series * 3 predictions
({}, None),
# Multiple targets with different context lengths
(
{"target_cols": ["sales", "revenue", "profit"], "n_points": [10, 17]},
None,
18,
), # 2 series * 3 targets * 3 predictions
({"target_cols": ["sales", "revenue", "profit"], "n_points": [10, 17]}, None),
# With past covariates
({"covariates": ["cov1"]}, None, 6),
({"covariates": ["cov1"]}, None),
# With future covariates
({"covariates": ["cov1"]}, {"covariates": ["cov1"], "n_points": [3, 3]}, 6),
({"covariates": ["cov1"]}, {"covariates": ["cov1"]}),
# With past-only and future covariates
({"covariates": ["cov1", "cov2"]}, {"covariates": ["cov1"], "n_points": [3, 3]}, 6),
({"covariates": ["cov1", "cov2"]}, {"covariates": ["cov1"]}),
# With past-only and future covariates and different series order
(
{"series_ids": ["B", "C", "A", "Z"], "n_points": [10, 20, 100, 256], "covariates": ["cov1", "cov2"]},
{
"series_ids": ["B", "C", "A", "Z"],
"covariates": ["cov1"],
"n_points": [3, 3, 3, 3],
},
12,
{"series_ids": ["B", "C", "A", "Z"], "covariates": ["cov1"]},
),
],
)
@pytest.mark.parametrize("freq", ["s", "min", "30min", "h", "D", "W", "ME", "QE", "YE"])
@pytest.mark.parametrize("prediction_length", [1, 4])
@pytest.mark.parametrize("validate_inputs", [True, False])
def test_predict_df_works_for_valid_inputs(
pipeline, context_setup, future_setup, expected_rows, freq, validate_inputs
pipeline, context_setup, future_setup, freq, validate_inputs, prediction_length
):
prediction_length = 3
df = create_df(**context_setup, freq=freq)
forecast_start_times = get_forecast_start_times(df, freq)
future_df = create_future_df(forecast_start_times, **future_setup, freq=freq) if future_setup else None
if future_setup:
series_ids = future_setup.get("series_ids", ["A", "B"])
future_setup_with_n_points = {**future_setup, "n_points": [prediction_length] * len(series_ids)}
future_df = create_future_df(forecast_start_times, **future_setup_with_n_points, freq=freq)
else:
future_df = None
series_ids = context_setup.get("series_ids", ["A", "B"])
target_columns = context_setup.get("target_cols", ["target"])
@ -471,6 +467,7 @@ def test_predict_df_works_for_valid_inputs(
validate_inputs=validate_inputs,
)
expected_rows = n_series * n_targets * prediction_length
assert len(result) == expected_rows
assert "item_id" in result.columns and np.all(
result["item_id"].to_numpy() == np.array(series_ids).repeat(n_targets * prediction_length)
@ -580,23 +577,23 @@ def test_predict_df_with_future_df_missing_series_raises_error(pipeline):
pipeline.predict_df(df, future_df=future_df)
def test_predict_df_with_future_df_with_different_lengths_raises_error(pipeline):
df = create_df(series_ids=["A", "B"], covariates=["cov1"])
future_df = create_future_df(
get_forecast_start_times(df), series_ids=["A", "B"], n_points=[3, 7], covariates=["cov1"]
)
with pytest.raises(ValueError, match="all time series must have length"):
pipeline.predict_df(df, future_df=future_df, prediction_length=3)
def test_predict_df_with_future_df_with_different_freq_raises_error(pipeline):
df = create_df(series_ids=["A", "B"], covariates=["cov1"], freq="h")
future_df = create_future_df(
get_forecast_start_times(df), series_ids=["A", "B"], n_points=[3, 3], covariates=["cov1"], freq="D"
)
with pytest.raises(ValueError, match="must have the same frequency as context"):
with pytest.raises(ValueError, match="future_df timestamps do not match"):
pipeline.predict_df(df, future_df=future_df, prediction_length=3)
def test_predict_df_with_future_df_with_different_lengths_raises_error(pipeline):
df = create_df(series_ids=["A", "B"], covariates=["cov1"])
future_df = create_future_df(
get_forecast_start_times(df), series_ids=["A", "B"], n_points=[3, 7], covariates=["cov1"]
)
with pytest.raises(ValueError, match="future_df must contain prediction"):
pipeline.predict_df(df, future_df=future_df, prediction_length=3)
@ -874,40 +871,36 @@ def test_when_input_time_series_are_too_short_then_finetuning_raises_error(pipel
@pytest.mark.parametrize(
"context_setup, future_setup, expected_rows",
"context_setup, future_setup",
[
# Targets only
({}, None, 6), # 2 series * 3 predictions
({}, None),
# Multiple targets with different context lengths
(
{"target_cols": ["sales", "revenue", "profit"], "n_points": [10, 17]},
None,
18,
), # 2 series * 3 targets * 3 predictions
({"target_cols": ["sales", "revenue", "profit"], "n_points": [10, 17]}, None),
# With past covariates
({"covariates": ["cov1"]}, None, 6),
({"covariates": ["cov1"]}, None),
# With future covariates
({"covariates": ["cov1"]}, {"covariates": ["cov1"], "n_points": [3, 3]}, 6),
({"covariates": ["cov1"]}, {"covariates": ["cov1"]}),
# With past-only and future covariates
({"covariates": ["cov1", "cov2"]}, {"covariates": ["cov1"], "n_points": [3, 3]}, 6),
({"covariates": ["cov1", "cov2"]}, {"covariates": ["cov1"]}),
# With past-only and future covariates and different series order
(
{"series_ids": ["B", "C", "A", "Z"], "n_points": [10, 20, 100, 256], "covariates": ["cov1", "cov2"]},
{
"series_ids": ["B", "C", "A", "Z"],
"covariates": ["cov1"],
"n_points": [3, 3, 3, 3],
},
12,
{"series_ids": ["B", "C", "A", "Z"], "covariates": ["cov1"]},
),
],
)
@pytest.mark.parametrize("freq", ["h", "D", "ME"])
def test_two_step_finetuning_with_df_input_works(pipeline, context_setup, future_setup, expected_rows, freq):
def test_two_step_finetuning_with_df_input_works(pipeline, context_setup, future_setup, freq):
prediction_length = 3
df = create_df(**context_setup, freq=freq)
forecast_start_times = get_forecast_start_times(df, freq)
future_df = create_future_df(forecast_start_times, **future_setup, freq=freq) if future_setup else None
if future_setup:
series_ids = future_setup.get("series_ids", ["A", "B"])
future_setup_with_n_points = {**future_setup, "n_points": [prediction_length] * len(series_ids)}
future_df = create_future_df(forecast_start_times, **future_setup_with_n_points, freq=freq)
else:
future_df = None
series_ids = context_setup.get("series_ids", ["A", "B"])
target_columns = context_setup.get("target_cols", ["target"])
@ -940,6 +933,7 @@ def test_two_step_finetuning_with_df_input_works(pipeline, context_setup, future
)
# Check predictions from the fine-tuned model are valid
expected_rows = n_series * n_targets * prediction_length
assert len(result) == expected_rows
assert "item_id" in result.columns and np.all(
result["item_id"].to_numpy() == np.array(series_ids).repeat(n_targets * prediction_length)