mirror of
https://github.com/NVIDIA-NeMo/DataDesigner
synced 2026-05-24 09:48:29 +00:00
fix(interface): silence registry-default deprecation when library auto-fills it (#655)
The ``ModelProviderRegistry.default is deprecated`` warning added in #594 fires for every fresh-install ``DataDesigner()`` construction, even when the user wrote ``default=`` nowhere — neither in YAML, nor in Python, nor in any ``ModelConfig``. Root cause: ``resolve_model_provider_registry`` synthesises ``default=providers[0].name`` for the multi-provider case to satisfy ``check_implicit_default``. The auto-seeded ``~/.data-designer/model_providers.yaml`` ships three providers and no ``default:`` key, so this path is hit on every bare ``DataDesigner()`` call. ``_warn_on_explicit_default`` then attributes the warning to the user's ``DataDesigner()`` line, with a remediation message ("Specify provider= explicitly on each ModelConfig") that doesn't even apply when the user hasn't built a ``ModelConfig`` (e.g. a UUID-only sampler config with the GitHub plugin). Fix: broaden the existing warning suppression in ``DataDesigner.__init__`` to also cover the ``model_providers is None`` case. The user is opting into all defaults — the library is the one filling ``default=``, so the deprecation nudge is misdirected. Users who hand-construct a multi-provider list in Python still see the warning (they wrote the multi-provider intent themselves), and direct ``ModelProviderRegistry(default="x")`` always warns — those are the entry points #589 actually targets. New regression test pins the bare-``DataDesigner()`` quiet path so a future tightening of the suppression can't silently re-introduce the spurious warning. Refs #589, follow-up to #594. Signed-off-by: Nabin Mulepati <nmulepati@nvidia.com>
This commit is contained in:
parent
ef761b824b
commit
3c8394e783
2 changed files with 78 additions and 6 deletions
|
|
@ -166,13 +166,31 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|||
self._model_providers = self._resolve_model_providers(model_providers)
|
||||
default_provider_name = None
|
||||
self._mcp_providers = mcp_providers or []
|
||||
# When the YAML carries a default, ``get_default_provider_name`` already
|
||||
# nudged the user with a ``DeprecationWarning``. Building the registry
|
||||
# below would re-fire ``ModelProviderRegistry._warn_on_explicit_default``
|
||||
# for the same root cause, so suppress that second warning. See PR #594
|
||||
# review.
|
||||
# Suppress ``ModelProviderRegistry._warn_on_explicit_default`` whenever
|
||||
# *we* are filling ``default=`` on the user's behalf rather than the
|
||||
# user actively opting into the deprecated registry-level default. Two
|
||||
# such cases:
|
||||
# 1. ``model_providers is None`` — the caller passed nothing, so we
|
||||
# load the YAML's ``providers:`` list and (in the multi-provider
|
||||
# case) ``resolve_model_provider_registry`` synthesises
|
||||
# ``default=providers[0].name`` to satisfy ``check_implicit_default``.
|
||||
# The fresh-install YAML ships three providers and no ``default:``
|
||||
# key, so this fires for every default ``DataDesigner()``
|
||||
# construction. The user has no actionable lever here, and the
|
||||
# warning's "Specify provider= on each ModelConfig" remediation
|
||||
# doesn't apply when they haven't built a ``ModelConfig`` at all.
|
||||
# 2. ``default_provider_name is not None`` — the YAML carried a
|
||||
# ``default:`` key and ``get_default_provider_name`` already
|
||||
# emitted the YAML-level ``DeprecationWarning``. The registry
|
||||
# warning would fire for the same root cause, so suppress it to
|
||||
# avoid double-warning. See PR #594 review.
|
||||
# Users who hand-construct a multi-provider list in Python still see
|
||||
# the warning (they wrote the multi-provider intent themselves), and
|
||||
# users who hand-construct ``ModelProviderRegistry(default=...)``
|
||||
# directly always see it — those are the entry points #589 targets.
|
||||
library_synthesised_default = model_providers is None or default_provider_name is not None
|
||||
with warnings.catch_warnings():
|
||||
if default_provider_name is not None:
|
||||
if library_synthesised_default:
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message="ModelProviderRegistry.default is deprecated",
|
||||
|
|
|
|||
|
|
@ -654,6 +654,60 @@ def test_init_yaml_default_emits_single_deprecation_warning(
|
|||
)
|
||||
|
||||
|
||||
def test_init_no_user_providers_no_yaml_default_stays_quiet(
|
||||
stub_artifact_path: Path,
|
||||
stub_managed_assets_path: Path,
|
||||
) -> None:
|
||||
"""Pin the bare-``DataDesigner()`` happy path: when the caller passes
|
||||
nothing and the YAML carries multiple ``providers:`` but no ``default:``
|
||||
key, ``resolve_model_provider_registry`` synthesises
|
||||
``default=providers[0].name`` to satisfy ``check_implicit_default``. The
|
||||
user did not opt into the deprecated registry-level default — the library
|
||||
filled it in on their behalf — so ``_warn_on_explicit_default`` must stay
|
||||
quiet. The fresh-install YAML ships exactly this shape (3 providers, no
|
||||
``default:``), so a regression here is what every user sees on their first
|
||||
``DataDesigner()`` call.
|
||||
|
||||
Counterpart to ``test_init_user_supplied_providers_preserve_first_wins_over_yaml_default``,
|
||||
which pins that the warning *does* fire when the caller hand-builds a
|
||||
multi-provider list themselves (they wrote the multi-provider intent, so
|
||||
the deprecation nudge applies).
|
||||
"""
|
||||
yaml_providers = [
|
||||
ModelProvider(
|
||||
name="yaml-first",
|
||||
endpoint="https://yaml-first.example.com/v1",
|
||||
api_key="yaml-first-key",
|
||||
),
|
||||
ModelProvider(
|
||||
name="yaml-second",
|
||||
endpoint="https://yaml-second.example.com/v1",
|
||||
api_key="yaml-second-key",
|
||||
),
|
||||
]
|
||||
|
||||
with warnings.catch_warnings(record=True) as caught:
|
||||
warnings.simplefilter("always", DeprecationWarning)
|
||||
with (
|
||||
patch.object(dd_mod, "get_default_providers", return_value=yaml_providers),
|
||||
patch.object(dd_mod, "get_default_provider_name", return_value=None),
|
||||
):
|
||||
data_designer = DataDesigner(
|
||||
artifact_path=stub_artifact_path,
|
||||
secret_resolver=PlaintextResolver(),
|
||||
managed_assets_path=stub_managed_assets_path,
|
||||
)
|
||||
|
||||
deprecation_messages = [str(w.message) for w in caught if issubclass(w.category, DeprecationWarning)]
|
||||
registry_default_warnings = [m for m in deprecation_messages if "ModelProviderRegistry.default is deprecated" in m]
|
||||
assert registry_default_warnings == [], (
|
||||
"Library-synthesised default must not emit the registry-level deprecation; "
|
||||
f"the user did not opt into it. Saw: {deprecation_messages}"
|
||||
)
|
||||
# Behavioral pin: first-wins still resolves correctly.
|
||||
assert data_designer.model_provider_registry.get_default_provider_name() == "yaml-first"
|
||||
|
||||
|
||||
def test_run_config_setting_persists(stub_artifact_path, stub_model_providers):
|
||||
"""Test that run config setting persists across multiple calls."""
|
||||
data_designer = DataDesigner(artifact_path=stub_artifact_path, model_providers=stub_model_providers)
|
||||
|
|
|
|||
Loading…
Reference in a new issue