Chronos-2: Change default fine-tuning learning rate and remove experimental label (#381)

*Issue #, if available:*

*Description of changes:* Lower learning rates generally appear to be
working better. This is probably because we are doing full fine-tuning
of a model with 120M params.


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.
This commit is contained in:
Abdul Fatir 2025-11-18 10:33:38 +01:00 committed by GitHub
parent 0f3c5652e5
commit 359d7ff1b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 75 additions and 79 deletions

File diff suppressed because one or more lines are too long

View file

@ -100,7 +100,7 @@ class Chronos2Pipeline(BaseChronosPipeline):
| Sequence[Mapping[str, TensorOrArray | Mapping[str, TensorOrArray | None]]]
| None = None,
context_length: int | None = None,
learning_rate: float = 1e-5,
learning_rate: float = 1e-6,
num_steps: int = 1000,
batch_size: int = 256,
output_dir: Path | str | None = None,
@ -126,7 +126,7 @@ class Chronos2Pipeline(BaseChronosPipeline):
context_length
The maximum context length used during fine-tuning, by default set to the model's default context length
learning_rate
The learning rate for the optimizer, by default 1e-5
The learning rate for the optimizer, by default 1e-6
num_steps
The number of steps to fine-tune for, by default 1000
batch_size
@ -153,12 +153,6 @@ class Chronos2Pipeline(BaseChronosPipeline):
from chronos.chronos2.trainer import Chronos2Trainer, EvaluateAndSaveFinalStepCallback
warnings.warn(
"Fine-tuning support is experimental and may be changed in future versions.",
category=FutureWarning,
stacklevel=2,
)
# Create a copy of the model to avoid modifying the original
config = deepcopy(self.model.config)
model = Chronos2Model(config).to(self.model.device) # type: ignore