fix(interface): silence registry-default deprecation when library auto-fills it (#655)

The ``ModelProviderRegistry.default is deprecated`` warning added in #594
fires for every fresh-install ``DataDesigner()`` construction, even when
the user wrote ``default=`` nowhere — neither in YAML, nor in Python, nor
in any ``ModelConfig``.

Root cause: ``resolve_model_provider_registry`` synthesises
``default=providers[0].name`` for the multi-provider case to satisfy
``check_implicit_default``. The auto-seeded
``~/.data-designer/model_providers.yaml`` ships three providers and no
``default:`` key, so this path is hit on every bare ``DataDesigner()``
call. ``_warn_on_explicit_default`` then attributes the warning to the
user's ``DataDesigner()`` line, with a remediation message ("Specify
provider= explicitly on each ModelConfig") that doesn't even apply when
the user hasn't built a ``ModelConfig`` (e.g. a UUID-only sampler config
with the GitHub plugin).

Fix: broaden the existing warning suppression in ``DataDesigner.__init__``
to also cover the ``model_providers is None`` case. The user is opting
into all defaults — the library is the one filling ``default=``, so the
deprecation nudge is misdirected. Users who hand-construct a
multi-provider list in Python still see the warning (they wrote the
multi-provider intent themselves), and direct
``ModelProviderRegistry(default="x")`` always warns — those are the
entry points #589 actually targets.

New regression test pins the bare-``DataDesigner()`` quiet path so a
future tightening of the suppression can't silently re-introduce the
spurious warning.

Refs #589, follow-up to #594.

Signed-off-by: Nabin Mulepati <nmulepati@nvidia.com>
This commit is contained in:
Nabin Mulepati 2026-05-13 14:15:10 -06:00 committed by GitHub
parent ef761b824b
commit 3c8394e783
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 78 additions and 6 deletions

View file

@ -166,13 +166,31 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
self._model_providers = self._resolve_model_providers(model_providers)
default_provider_name = None
self._mcp_providers = mcp_providers or []
# When the YAML carries a default, ``get_default_provider_name`` already
# nudged the user with a ``DeprecationWarning``. Building the registry
# below would re-fire ``ModelProviderRegistry._warn_on_explicit_default``
# for the same root cause, so suppress that second warning. See PR #594
# review.
# Suppress ``ModelProviderRegistry._warn_on_explicit_default`` whenever
# *we* are filling ``default=`` on the user's behalf rather than the
# user actively opting into the deprecated registry-level default. Two
# such cases:
# 1. ``model_providers is None`` — the caller passed nothing, so we
# load the YAML's ``providers:`` list and (in the multi-provider
# case) ``resolve_model_provider_registry`` synthesises
# ``default=providers[0].name`` to satisfy ``check_implicit_default``.
# The fresh-install YAML ships three providers and no ``default:``
# key, so this fires for every default ``DataDesigner()``
# construction. The user has no actionable lever here, and the
# warning's "Specify provider= on each ModelConfig" remediation
# doesn't apply when they haven't built a ``ModelConfig`` at all.
# 2. ``default_provider_name is not None`` — the YAML carried a
# ``default:`` key and ``get_default_provider_name`` already
# emitted the YAML-level ``DeprecationWarning``. The registry
# warning would fire for the same root cause, so suppress it to
# avoid double-warning. See PR #594 review.
# Users who hand-construct a multi-provider list in Python still see
# the warning (they wrote the multi-provider intent themselves), and
# users who hand-construct ``ModelProviderRegistry(default=...)``
# directly always see it — those are the entry points #589 targets.
library_synthesised_default = model_providers is None or default_provider_name is not None
with warnings.catch_warnings():
if default_provider_name is not None:
if library_synthesised_default:
warnings.filterwarnings(
"ignore",
message="ModelProviderRegistry.default is deprecated",

View file

@ -654,6 +654,60 @@ def test_init_yaml_default_emits_single_deprecation_warning(
)
def test_init_no_user_providers_no_yaml_default_stays_quiet(
stub_artifact_path: Path,
stub_managed_assets_path: Path,
) -> None:
"""Pin the bare-``DataDesigner()`` happy path: when the caller passes
nothing and the YAML carries multiple ``providers:`` but no ``default:``
key, ``resolve_model_provider_registry`` synthesises
``default=providers[0].name`` to satisfy ``check_implicit_default``. The
user did not opt into the deprecated registry-level default the library
filled it in on their behalf so ``_warn_on_explicit_default`` must stay
quiet. The fresh-install YAML ships exactly this shape (3 providers, no
``default:``), so a regression here is what every user sees on their first
``DataDesigner()`` call.
Counterpart to ``test_init_user_supplied_providers_preserve_first_wins_over_yaml_default``,
which pins that the warning *does* fire when the caller hand-builds a
multi-provider list themselves (they wrote the multi-provider intent, so
the deprecation nudge applies).
"""
yaml_providers = [
ModelProvider(
name="yaml-first",
endpoint="https://yaml-first.example.com/v1",
api_key="yaml-first-key",
),
ModelProvider(
name="yaml-second",
endpoint="https://yaml-second.example.com/v1",
api_key="yaml-second-key",
),
]
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always", DeprecationWarning)
with (
patch.object(dd_mod, "get_default_providers", return_value=yaml_providers),
patch.object(dd_mod, "get_default_provider_name", return_value=None),
):
data_designer = DataDesigner(
artifact_path=stub_artifact_path,
secret_resolver=PlaintextResolver(),
managed_assets_path=stub_managed_assets_path,
)
deprecation_messages = [str(w.message) for w in caught if issubclass(w.category, DeprecationWarning)]
registry_default_warnings = [m for m in deprecation_messages if "ModelProviderRegistry.default is deprecated" in m]
assert registry_default_warnings == [], (
"Library-synthesised default must not emit the registry-level deprecation; "
f"the user did not opt into it. Saw: {deprecation_messages}"
)
# Behavioral pin: first-wins still resolves correctly.
assert data_designer.model_provider_registry.get_default_provider_name() == "yaml-first"
def test_run_config_setting_persists(stub_artifact_path, stub_model_providers):
"""Test that run config setting persists across multiple calls."""
data_designer = DataDesigner(artifact_path=stub_artifact_path, model_providers=stub_model_providers)