mirror of
https://github.com/amazon-science/chronos-forecasting
synced 2026-05-23 17:48:23 +00:00
Fix issue with new caching mechanism in transformers and bump versions (#313)
*Issue #, if available:* Fixes #310 and closes #302 *Description of changes:* This PR fixes an issue related to the new caching mechanism for T5 introduced in `transformers==4.54`. [Prior versions set](https://github.com/huggingface/transformers/blob/v4.53.3/src/transformers/models/t5/modeling_t5.py#L1328) `encoder_config.is_encoder_decoder = False` when initializing encoder and decoder. Following transformers, we also initialized Chronos-Bolt in the same way. However, in v4.54 this line [has been removed](3fd456b200/src/transformers/models/t5/modeling_t5.py (L1301)) and [new logic has been added](3fd456b200/src/transformers/models/t5/modeling_t5.py (L494)) which relies on `is_encoder_decoder` [being True](3fd456b200/src/transformers/models/t5/modeling_t5.py (L1007)). This causes Chronos-Bolt to break as described in #310. This PR removes `is_encoder_decoder = False` for both encoder and decoder which fixes the issue. I re-ran our mini eval in the CI and got the same results for v4.54 and v4.48 (our current lower bound). This PR also bumps package versions. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
This commit is contained in:
parent
6a9c8dadac
commit
fcd09fe8b6
3 changed files with 42 additions and 24 deletions
|
|
@ -1,25 +1,21 @@
|
|||
[project]
|
||||
name = "chronos-forecasting"
|
||||
version = "1.5.2"
|
||||
version = "1.5.3"
|
||||
authors = [
|
||||
{ name="Abdul Fatir Ansari", email="ansarnd@amazon.com" },
|
||||
{ name="Lorenzo Stella", email="stellalo@amazon.com" },
|
||||
{ name="Caner Turkmen", email="atturkm@amazon.com" },
|
||||
{ name = "Abdul Fatir Ansari", email = "ansarnd@amazon.com" },
|
||||
{ name = "Lorenzo Stella", email = "stellalo@amazon.com" },
|
||||
{ name = "Caner Turkmen", email = "atturkm@amazon.com" },
|
||||
]
|
||||
description = "Chronos: Pretrained models for time series forecasting"
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"torch>=2.0,<3", # package was tested on 2.2
|
||||
"transformers>=4.48,<5",
|
||||
"accelerate>=0.32,<2",
|
||||
]
|
||||
dependencies = ["torch>=2.0,<3", "transformers>=4.48,<5", "accelerate>=0.32,<2"]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Operating System :: OS Independent",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Operating System :: OS Independent",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
|
@ -30,10 +26,24 @@ build-backend = "hatchling.build"
|
|||
packages = ["src/chronos"]
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = ["pytest~=8.0", "numpy~=1.21"]
|
||||
test = ["pytest~=8.0", "numpy>=1.21,<3"]
|
||||
typecheck = ["mypy~=1.9"]
|
||||
training = ["gluonts[pro]~=0.15", "numpy~=1.21", "datasets~=2.18", "typer", "typer-config", "joblib", "scikit-learn", "tensorboard"]
|
||||
evaluation = ["gluonts[pro]~=0.15", "numpy~=1.21", "datasets~=2.18", "typer"]
|
||||
training = [
|
||||
"gluonts[pro]~=0.15",
|
||||
"numpy>=1.21,<3",
|
||||
"datasets>=2.18,<4",
|
||||
"typer",
|
||||
"typer-config",
|
||||
"joblib",
|
||||
"scikit-learn",
|
||||
"tensorboard",
|
||||
]
|
||||
evaluation = [
|
||||
"gluonts[pro]~=0.15",
|
||||
"numpy>=1.21,<3",
|
||||
"datasets>=2.18,<4",
|
||||
"typer",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/amazon-science/chronos-forecasting"
|
||||
|
|
@ -42,3 +52,11 @@ Paper = "https://arxiv.org/abs/2403.07815"
|
|||
|
||||
[tool.mypy]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 88
|
||||
lint.ignore = [
|
||||
"E501", # Line too long
|
||||
"E731", # Do not assign a `lambda` expression, use a `def`
|
||||
"E722", # Do not use bare `except`
|
||||
]
|
||||
|
|
|
|||
|
|
@ -302,10 +302,12 @@ class ChronosModel(nn.Module):
|
|||
A tensor of encoder embeddings with shape
|
||||
(batch_size, sequence_length, d_model).
|
||||
"""
|
||||
assert (
|
||||
self.config.model_type == "seq2seq"
|
||||
), "Encoder embeddings are only supported for encoder-decoder models"
|
||||
assert hasattr(self.model, "encoder")
|
||||
assert self.config.model_type == "seq2seq", (
|
||||
"Encoder embeddings are only supported for encoder-decoder models"
|
||||
)
|
||||
assert hasattr(self.model, "encoder") and isinstance(
|
||||
self.model.encoder, nn.Module
|
||||
)
|
||||
|
||||
return self.model.encoder(
|
||||
input_ids=input_ids, attention_mask=attention_mask
|
||||
|
|
@ -346,7 +348,7 @@ class ChronosModel(nn.Module):
|
|||
if top_p is None:
|
||||
top_p = self.config.top_p
|
||||
|
||||
assert hasattr(self.model, "generate")
|
||||
assert callable(getattr(self.model, "generate", None))
|
||||
|
||||
preds = self.model.generate(
|
||||
input_ids=input_ids,
|
||||
|
|
@ -362,7 +364,7 @@ class ChronosModel(nn.Module):
|
|||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
),
|
||||
)
|
||||
) # type: ignore
|
||||
|
||||
if self.config.model_type == "seq2seq":
|
||||
preds = preds[..., 1:] # remove the decoder start token
|
||||
|
|
|
|||
|
|
@ -179,7 +179,6 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
|
|||
encoder_config = copy.deepcopy(config)
|
||||
encoder_config.is_decoder = False
|
||||
encoder_config.use_cache = False
|
||||
encoder_config.is_encoder_decoder = False
|
||||
self.encoder = T5Stack(encoder_config, self.shared)
|
||||
|
||||
self._init_decoder(config)
|
||||
|
|
@ -381,7 +380,6 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
|
|||
def _init_decoder(self, config):
|
||||
decoder_config = copy.deepcopy(config)
|
||||
decoder_config.is_decoder = True
|
||||
decoder_config.is_encoder_decoder = False
|
||||
decoder_config.num_layers = config.num_decoder_layers
|
||||
self.decoder = T5Stack(decoder_config, self.shared)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue