mirror of
https://github.com/NVIDIA-NeMo/DataDesigner
synced 2026-05-24 09:48:29 +00:00
fix(models): make deprecation warnings visible under default filters
andreatgretel (PR #594): the YAML-default warning in `get_default_provider_name` and the registry-default warning emitted from inside DataDesigner helpers were attributing to data_designer library frames, not user code. Python's default filter chain includes `ignore::DeprecationWarning`, so library-attributed entries are silenced — meaning a normal `DataDesigner()` call with a YAML `default:` set showed nothing, and `resolve_model_provider_registry` warnings were similarly invisible. Two related changes: 1. `warn_at_caller`: extend the default skip-list from `("pydantic",)` to `("pydantic", "pydantic_core", "data_designer")` so the walk escapes both pydantic's validator-dispatch frames and data_designer helper frames before attributing. Also tighten the prefix predicate to exact-or-dotted-prefix matching (`name == p or name.startswith(p + ".")`) so e.g. `pydantic_helpers` is not falsely matched as part of `pydantic` (johnnygreco nit). Allow callers to pass a custom `skip_prefixes` for flexibility. Drop the "skip frame 0+1 unconditionally" guard now that prefix matching covers it. 2. `get_default_provider_name`: switch from `warnings.warn(stacklevel=2)` to `warn_at_caller`. The previous stacklevel pointed into `default_model_settings.py`, which is a library file → silenced under default filters. Verified the fix empirically with `python -W default`: warning is now attributed to the user's call site and rendered. johnnygreco (PR #594): add the missing `test_explicit_default_none_does_not_emit_deprecation_warning` regression for the `self.default is not None` predicate landed in the prior round. Tests: - New `test_warning_helpers.py` pins prefix-matching precision (rejects `pydantic_helpers` / `data_designer_other`), default skip-list contents, attribution past skip-prefix frames, and per-call-site dedup behavior. - `test_get_default_provider_name_warning_attributes_to_user_frame` pins andreatgretel's repro for the YAML-default site. - `test_explicit_default_warning_attributes_to_user_frame` pins the multi-frame case: construction goes through `resolve_model_provider_registry`, so the walk has to escape both pydantic and data_designer before landing on the test file. - `test_explicit_default_none_does_not_emit_deprecation_warning` pins johnnygreco's predicate-tightening regression. 3,124 tests pass (540 config + 1,923 engine + 653 interface; +10 net from this round). Refs #589 Made-with: Cursor
This commit is contained in:
parent
481643f856
commit
247fa3000d
5 changed files with 229 additions and 27 deletions
|
|
@ -5,7 +5,6 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
import os
|
||||
import warnings
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
|
@ -25,6 +24,7 @@ from data_designer.config.utils.constants import (
|
|||
PREDEFINED_PROVIDERS_MODEL_MAP,
|
||||
)
|
||||
from data_designer.config.utils.io_helpers import load_config_file, save_config_file
|
||||
from data_designer.config.utils.warning_helpers import warn_at_caller
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -104,12 +104,18 @@ def get_default_provider_name() -> str | None:
|
|||
"""
|
||||
default = _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default")
|
||||
if default is not None:
|
||||
warnings.warn(
|
||||
# ``warn_at_caller`` (rather than ``warnings.warn(stacklevel=2)``) so the
|
||||
# warning attributes to the user's call site rather than this library
|
||||
# module. The only real call path is ``DataDesigner.__init__``, which
|
||||
# is itself a ``data_designer`` frame; under default Python filters,
|
||||
# library-attributed ``DeprecationWarning`` entries are silenced
|
||||
# (``ignore::DeprecationWarning``), so library attribution = invisible
|
||||
# warning. See PR #594 review.
|
||||
warn_at_caller(
|
||||
f"The 'default:' key in {MODEL_PROVIDERS_FILE_PATH} is deprecated and will "
|
||||
"be removed in a future release. Remove it and specify provider= explicitly "
|
||||
"on each ModelConfig instead. See issue #589.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return default
|
||||
|
||||
|
|
|
|||
|
|
@ -3,22 +3,30 @@
|
|||
|
||||
"""Helpers for emitting warnings that attribute correctly to user code.
|
||||
|
||||
Pydantic v2 dispatches ``@model_validator`` / ``@field_validator`` callbacks
|
||||
through several internal frames. ``warnings.warn(stacklevel=N)`` from inside a
|
||||
validator therefore tends to land inside pydantic's machinery rather than at
|
||||
the user's ``ModelConfig(...)`` / ``ModelProviderRegistry(...)`` call site.
|
||||
Library-internal warnings (typically emitted from a pydantic ``@model_validator``
|
||||
or from a helper function) need to be attributed to the *user's* call site, not
|
||||
to the library frame that happened to fire them. Two reasons:
|
||||
|
||||
That breaks two things:
|
||||
1. Attribution — a source location pointing at library internals isn't
|
||||
actionable.
|
||||
2. Visibility under default filters — Python's default ``DeprecationWarning``
|
||||
filter is::
|
||||
|
||||
1. Attribution — the displayed source location is unhelpful.
|
||||
2. Deduplication — Python's default once-per-location dedup key is
|
||||
default::DeprecationWarning:__main__
|
||||
ignore::DeprecationWarning
|
||||
|
||||
Library-attributed ``DeprecationWarning`` entries fall under the second
|
||||
filter and are silenced. Attributing to user code is what gets the warning
|
||||
actually shown.
|
||||
|
||||
3. Deduplication — Python's once-per-location dedup key is
|
||||
``(category, module, lineno)``. When every call resolves to the same
|
||||
pydantic-internal line, every warning after the first is silently
|
||||
suppressed regardless of which user file triggered it.
|
||||
library-internal line, every warning after the first is silently suppressed
|
||||
regardless of which user file triggered it.
|
||||
|
||||
``warn_at_caller`` walks the stack to the first frame outside pydantic (and
|
||||
outside this helper / the calling validator) and uses
|
||||
``warnings.warn_explicit`` to attribute the warning there.
|
||||
``warn_at_caller`` walks the stack past frames whose module belongs to a known
|
||||
internal package (pydantic, data_designer) and uses ``warnings.warn_explicit``
|
||||
to attribute the warning at the first user frame.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -26,19 +34,50 @@ from __future__ import annotations
|
|||
import sys
|
||||
import warnings
|
||||
|
||||
DEFAULT_INTERNAL_PREFIXES: tuple[str, ...] = ("pydantic", "pydantic_core", "data_designer")
|
||||
"""Modules whose frames are skipped when walking up to the user's call site.
|
||||
|
||||
def warn_at_caller(message: str, category: type[Warning]) -> None:
|
||||
"""Emit ``message`` attributed to the first non-pydantic frame above the caller.
|
||||
Matching is exact-or-dotted-prefix (see ``_module_in_prefixes``), so
|
||||
``pydantic_helpers`` is *not* treated as part of ``pydantic``."""
|
||||
|
||||
Intended to be invoked from inside a pydantic validator. The walk skips this
|
||||
helper's own frame and the calling validator's frame, then walks past any
|
||||
pydantic-internal frames until it finds the user's call site.
|
||||
|
||||
def _module_in_prefixes(module_name: str, prefixes: tuple[str, ...]) -> bool:
|
||||
"""Return True if ``module_name`` belongs to one of the prefix-rooted packages.
|
||||
|
||||
Uses exact-equality plus dotted-prefix matching so that, e.g.,
|
||||
``pydantic_helpers`` is NOT treated as part of the ``pydantic`` package
|
||||
while ``pydantic.fields`` is. Same for ``data_designer`` vs. a hypothetical
|
||||
``data_designer_other``.
|
||||
"""
|
||||
return any(module_name == prefix or module_name.startswith(prefix + ".") for prefix in prefixes)
|
||||
|
||||
|
||||
def warn_at_caller(
|
||||
message: str,
|
||||
category: type[Warning],
|
||||
*,
|
||||
skip_prefixes: tuple[str, ...] = DEFAULT_INTERNAL_PREFIXES,
|
||||
) -> None:
|
||||
"""Emit ``message`` attributed to the first frame outside ``skip_prefixes``.
|
||||
|
||||
Intended for warnings whose root cause is the user's call site but whose
|
||||
emission point is library code (a pydantic validator, an internal helper,
|
||||
etc.). The walk starts above this helper's own frame and skips every frame
|
||||
whose module belongs to a package in ``skip_prefixes`` until it reaches a
|
||||
user frame.
|
||||
|
||||
The default skip set covers:
|
||||
|
||||
* ``pydantic`` / ``pydantic_core`` — so warnings emitted from
|
||||
``@model_validator`` callbacks escape pydantic's dispatch frames.
|
||||
* ``data_designer`` — so warnings emitted from a registry / model-config
|
||||
built deep inside a DataDesigner helper still attribute to the outermost
|
||||
user call. Without this, attribution lands on a library file and Python's
|
||||
default ``DeprecationWarning`` filter silences the warning entirely.
|
||||
|
||||
The user frame's ``__warningregistry__`` is passed to
|
||||
``warnings.warn_explicit`` so Python's built-in once-per-location dedup keys
|
||||
on the *user's* (filename, lineno) rather than an internal pydantic frame.
|
||||
That matches how ``warnings.warn`` would dedup if ``stacklevel`` could
|
||||
correctly point at the user.
|
||||
on the *user's* (filename, lineno) rather than an internal frame.
|
||||
|
||||
We deliberately do *not* pass ``module_globals`` — it's only used for
|
||||
``linecache`` source-line display, and for scripts run with ``python -c``
|
||||
|
|
@ -47,11 +86,10 @@ def warn_at_caller(message: str, category: type[Warning]) -> None:
|
|||
module")``. Skipping ``module_globals`` keeps the warning path robust at
|
||||
the cost of an empty source line in the formatted output.
|
||||
"""
|
||||
# Skip frame 0 (warn_at_caller) and frame 1 (the validator that called us).
|
||||
frame = sys._getframe(2) if hasattr(sys, "_getframe") else None
|
||||
frame = sys._getframe(1) if hasattr(sys, "_getframe") else None
|
||||
while frame is not None:
|
||||
module_name = frame.f_globals.get("__name__", "")
|
||||
if not module_name.startswith("pydantic"):
|
||||
if not _module_in_prefixes(module_name, skip_prefixes):
|
||||
warnings.warn_explicit(
|
||||
message,
|
||||
category,
|
||||
|
|
@ -63,5 +101,5 @@ def warn_at_caller(message: str, category: type[Warning]) -> None:
|
|||
return
|
||||
frame = frame.f_back
|
||||
|
||||
# Fallback: never escaped pydantic frames (or no frame access). Use stacklevel.
|
||||
# Fallback: never escaped library frames (or no frame access). Use stacklevel.
|
||||
warnings.warn(message, category, stacklevel=3)
|
||||
|
|
|
|||
|
|
@ -167,6 +167,32 @@ def test_get_default_provider_name_without_default_key(tmp_path: Path):
|
|||
assert get_default_provider_name() is None
|
||||
|
||||
|
||||
def test_get_default_provider_name_warning_attributes_to_user_frame(tmp_path: Path):
|
||||
"""Regression for PR #594 review (andreatgretel): the YAML-default warning
|
||||
must attribute to the user's call site, not to ``default_model_settings.py``.
|
||||
Python's default filter ignores library-attributed ``DeprecationWarning``
|
||||
entries, so the previous ``stacklevel=2`` attribution rendered the warning
|
||||
invisible under default filters on the only real call path
|
||||
(``DataDesigner.__init__``). See issue #589.
|
||||
"""
|
||||
providers_file_path = tmp_path / "providers.yaml"
|
||||
providers_file_path.write_text(
|
||||
json.dumps(dict(providers=[p.model_dump() for p in get_builtin_model_providers()], default="nvidia"))
|
||||
)
|
||||
with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path):
|
||||
with warnings.catch_warnings(record=True) as caught:
|
||||
warnings.simplefilter("always", DeprecationWarning)
|
||||
assert get_default_provider_name() == "nvidia"
|
||||
|
||||
matches = [w for w in caught if "'default:' key" in str(w.message)]
|
||||
assert len(matches) == 1, [str(w.message) for w in caught]
|
||||
assert matches[0].filename == __file__, (
|
||||
f"Warning attributed to {matches[0].filename!r} (line {matches[0].lineno}) "
|
||||
f"instead of the test file. Library-attributed DeprecationWarnings are "
|
||||
f"silenced under default filters."
|
||||
)
|
||||
|
||||
|
||||
def test_get_default_provider_name_path_does_not_exist():
|
||||
with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=Path("non_existent_path")):
|
||||
with pytest.raises(FileNotFoundError, match=r"Default model providers file not found at 'non_existent_path'"):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
|
||||
from data_designer.config.utils.warning_helpers import _module_in_prefixes, warn_at_caller
|
||||
|
||||
|
||||
def test_module_in_prefixes_exact_match():
|
||||
assert _module_in_prefixes("pydantic", ("pydantic",)) is True
|
||||
|
||||
|
||||
def test_module_in_prefixes_dotted_submodule():
|
||||
assert _module_in_prefixes("pydantic.fields", ("pydantic",)) is True
|
||||
assert _module_in_prefixes("data_designer.config.models", ("data_designer",)) is True
|
||||
|
||||
|
||||
def test_module_in_prefixes_rejects_prefix_collision():
|
||||
"""Regression for PR #594 review (johnnygreco): ``startswith`` matching
|
||||
naively on the prefix would silently treat ``pydantic_helpers`` as part of
|
||||
the ``pydantic`` package. Anchor on exact-or-dotted-prefix instead.
|
||||
"""
|
||||
assert _module_in_prefixes("pydantic_helpers", ("pydantic",)) is False
|
||||
assert _module_in_prefixes("pydanticfoo", ("pydantic",)) is False
|
||||
assert _module_in_prefixes("data_designer_other", ("data_designer",)) is False
|
||||
|
||||
|
||||
def test_warn_at_caller_attributes_to_direct_caller():
|
||||
"""When called from a non-skipped module, the warning attributes to the
|
||||
caller's frame.
|
||||
"""
|
||||
with warnings.catch_warnings(record=True) as caught:
|
||||
warnings.simplefilter("always")
|
||||
warn_at_caller("hello", DeprecationWarning) # line anchored below
|
||||
|
||||
assert len(caught) == 1
|
||||
assert caught[0].filename == __file__
|
||||
assert "hello" in str(caught[0].message)
|
||||
|
||||
|
||||
def test_warn_at_caller_skips_skip_prefix_frames():
|
||||
"""The walk should escape any frame whose module is listed in
|
||||
``skip_prefixes`` and attribute to the first frame outside them. We
|
||||
simulate a library frame by ``exec``-ing a helper with a fake module name
|
||||
in its globals; calling that helper produces a frame whose
|
||||
``f_globals["__name__"]`` is the fake name, mirroring how a real library
|
||||
frame would appear during the walk.
|
||||
"""
|
||||
library_globals: dict[str, object] = {
|
||||
"__name__": "fake_library.dispatch",
|
||||
"warn_at_caller": warn_at_caller,
|
||||
"DeprecationWarning": DeprecationWarning,
|
||||
}
|
||||
exec(
|
||||
"def emit():\n warn_at_caller('from-library', DeprecationWarning, skip_prefixes=('fake_library',))\n",
|
||||
library_globals,
|
||||
)
|
||||
emit = library_globals["emit"]
|
||||
|
||||
with warnings.catch_warnings(record=True) as caught:
|
||||
warnings.simplefilter("always")
|
||||
emit()
|
||||
|
||||
assert len(caught) == 1
|
||||
assert caught[0].filename == __file__, f"Expected attribution at {__file__!r}, got {caught[0].filename!r}"
|
||||
|
||||
|
||||
def test_warn_at_caller_default_skips_pydantic_and_data_designer():
|
||||
"""Default ``skip_prefixes`` should cover both pydantic and data_designer
|
||||
so warnings emitted from validators inside DataDesigner internals attribute
|
||||
to the user, not to either library.
|
||||
"""
|
||||
from data_designer.config.utils.warning_helpers import DEFAULT_INTERNAL_PREFIXES
|
||||
|
||||
assert "pydantic" in DEFAULT_INTERNAL_PREFIXES
|
||||
assert "data_designer" in DEFAULT_INTERNAL_PREFIXES
|
||||
|
||||
|
||||
def test_warn_at_caller_dedup_keys_on_user_call_site():
|
||||
"""Python's once-per-location dedup keys on (text, category, lineno) inside
|
||||
the *attributing* frame's ``__warningregistry__``. With proper user
|
||||
attribution, two distinct call sites in the user's file each emit a
|
||||
warning under ``default`` filtering, while a repeated call at the same
|
||||
site emits only the first.
|
||||
"""
|
||||
with warnings.catch_warnings(record=True) as caught:
|
||||
warnings.simplefilter("default", DeprecationWarning)
|
||||
warn_at_caller("dedup-test", DeprecationWarning) # site A
|
||||
warn_at_caller("dedup-test", DeprecationWarning) # site B
|
||||
|
||||
linenos = {w.lineno for w in caught}
|
||||
assert len(caught) == 2, [str(w.message) for w in caught]
|
||||
assert len(linenos) == 2, "Each call site should produce a distinct dedup key"
|
||||
|
|
@ -124,6 +124,43 @@ def test_no_default_does_not_emit_deprecation_warning(stub_foo_provider: ModelPr
|
|||
ModelProviderRegistry(providers=[stub_foo_provider])
|
||||
|
||||
|
||||
def test_explicit_default_none_does_not_emit_deprecation_warning(stub_foo_provider: ModelProvider) -> None:
|
||||
"""Pin the predicate tightening from PR #594 review: passing ``default=None``
|
||||
explicitly is semantically equivalent to omitting it (caller is opting *out*
|
||||
of a registry-level default), so the deprecation must NOT fire.
|
||||
"""
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error", DeprecationWarning)
|
||||
ModelProviderRegistry(providers=[stub_foo_provider], default=None)
|
||||
|
||||
|
||||
def test_explicit_default_warning_attributes_to_user_frame(
|
||||
stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider
|
||||
) -> None:
|
||||
"""Regression for PR #594 review (andreatgretel): the ``default=`` deprecation
|
||||
warning must attribute to the *user's* call site, not the pydantic-internal
|
||||
or ``data_designer`` library frame that emits it. Library-attributed
|
||||
``DeprecationWarning`` entries are silenced under Python's default
|
||||
``ignore::DeprecationWarning`` filter, so attribution determines whether
|
||||
the warning is actually visible.
|
||||
|
||||
Construction goes through ``resolve_model_provider_registry`` so the walk
|
||||
has to escape both pydantic (validator dispatch) and ``data_designer``
|
||||
(the helper that builds the registry) before landing on the test frame.
|
||||
"""
|
||||
with warnings.catch_warnings(record=True) as caught:
|
||||
warnings.simplefilter("always", DeprecationWarning)
|
||||
resolve_model_provider_registry([stub_foo_provider, stub_bar_provider], default_provider_name="bar")
|
||||
|
||||
matches = [w for w in caught if "ModelProviderRegistry.default is deprecated" in str(w.message)]
|
||||
assert len(matches) == 1, [str(w.message) for w in caught]
|
||||
assert matches[0].filename == __file__, (
|
||||
f"Warning attributed to {matches[0].filename!r} (line {matches[0].lineno}) "
|
||||
f"instead of the test file. Library-attributed DeprecationWarnings are "
|
||||
f"silenced under default filters."
|
||||
)
|
||||
|
||||
|
||||
def test_resolve_single_provider_quiet_under_deprecation(stub_foo_provider: ModelProvider) -> None:
|
||||
"""Pin the q3 tweak: ``resolve_model_provider_registry`` skips ``default=``
|
||||
in the single-provider case so common construction paths stay quiet under
|
||||
|
|
|
|||
Loading…
Reference in a new issue