chore: lazy 3rd party imports (#222)

This commit is contained in:
Nabin Mulepati 2026-01-15 14:51:54 -07:00 committed by GitHub
parent 1ee37bc317
commit 7181db3eb7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
169 changed files with 1003 additions and 255 deletions

3
.gitignore vendored
View file

@ -93,3 +93,6 @@ docs/notebook_source/*.csv
docs/**/artifacts/
tests_e2e/uv.lock
# Performance profiling
perf_*.txt

145
AGENTS.md
View file

@ -158,12 +158,13 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
### Import Style
- **ALWAYS** use absolute imports, never relative imports
- Place imports at module level, not inside functions
- Place imports at module level, not inside functions (exception: it is unavoidable for performance reasons)
- Import sorting is handled by `ruff`'s `isort` - imports should be grouped and sorted:
1. Standard library imports
2. Third-party imports
2. Third-party imports (use `lazy_heavy_imports` for heavy libraries)
3. First-party imports (`data_designer`)
- Use standard import conventions (enforced by `ICN`)
- See [Lazy Loading and TYPE_CHECKING](#lazy-loading-and-type_checking) section for optimization guidelines
```python
# Good
@ -184,6 +185,146 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
path = Path(filename)
```
### Lazy Loading and TYPE_CHECKING
This project uses lazy loading for heavy third-party dependencies to optimize import performance.
#### When to Use Lazy Loading
**Heavy third-party libraries** (>100ms import cost) should be lazy-loaded via `lazy_heavy_imports.py`:
```python
# ❌ Don't import directly
import pandas as pd
import numpy as np
# ✅ Use lazy loading with IDE support
from typing import TYPE_CHECKING
from data_designer.lazy_heavy_imports import pd, np
if TYPE_CHECKING:
import pandas as pd # For IDE autocomplete and type hints
import numpy as np
```
This pattern provides:
- Runtime lazy loading (fast startup)
- Full IDE support (autocomplete, type hints)
- Type checker validation
**See [lazy_heavy_imports.py](src/data_designer/lazy_heavy_imports.py) for the current list of lazy-loaded libraries.**
#### Adding New Heavy Dependencies
If you add a new dependency with significant import cost (>100ms):
1. **Add to `lazy_heavy_imports.py`:**
```python
_LAZY_IMPORTS = {
# ... existing entries ...
"your_lib": "your_library_name",
}
```
2. **Update imports across codebase:**
```python
from typing import TYPE_CHECKING
from data_designer.lazy_heavy_imports import your_lib
if TYPE_CHECKING:
import your_library_name as your_lib # For IDE support
```
3. **Verify with performance test:**
```bash
make perf-import CLEAN=1
```
#### Using TYPE_CHECKING Blocks
`TYPE_CHECKING` blocks defer imports that are only needed for type hints, preventing circular dependencies and reducing import time.
**For internal data_designer imports:**
```python
from __future__ import annotations # Always include at top
from typing import TYPE_CHECKING
# Runtime imports
from pathlib import Path
from data_designer.config.base import ConfigBase
if TYPE_CHECKING:
# Type-only imports - only visible to type checkers
from data_designer.engine.models.facade import ModelFacade
def get_model(model: ModelFacade) -> str:
return model.name
```
**For lazy-loaded libraries (see pattern in "When to Use Lazy Loading" above):**
- Import from `lazy_heavy_imports` for runtime
- Add full import in `TYPE_CHECKING` block for IDE support
**Rules for TYPE_CHECKING:**
✅ **DO put in TYPE_CHECKING:**
- Internal `data_designer` imports used **only** in type hints
- Imports that would cause circular dependencies
- **Full imports of lazy-loaded libraries for IDE support** (e.g., `import pandas as pd` in addition to runtime `from data_designer.lazy_heavy_imports import pd`)
❌ **DON'T put in TYPE_CHECKING:**
- **Standard library imports** (`Path`, `Any`, `Callable`, `Literal`, `TypeAlias`, etc.)
- **Pydantic model types** used in field definitions (needed at runtime for validation)
- **Types used in discriminated unions** (Pydantic needs them at runtime)
- **Any import used at runtime** (instantiation, method calls, base classes, etc.)
**Examples:**
```python
# ✅ CORRECT - Lazy-loaded library with IDE support
from typing import TYPE_CHECKING
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd # IDE gets full type hints
def load_data(path: str) -> pd.DataFrame: # IDE understands pd.DataFrame
return pd.read_csv(path)
# ✅ CORRECT - Standard library NOT in TYPE_CHECKING
from pathlib import Path
from typing import Any
def process_file(path: Path) -> Any:
return path.read_text()
# ✅ CORRECT - Internal type-only import
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from data_designer.engine.models.facade import ModelFacade
def get_model(model: ModelFacade) -> str: # Only used in type hint
return model.name
# ❌ INCORRECT - Pydantic field type in TYPE_CHECKING
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from data_designer.config.models import ModelConfig # Wrong!
class MyConfig(BaseModel):
model: ModelConfig # Pydantic needs this at runtime!
# ✅ CORRECT - Pydantic field type at runtime
from data_designer.config.models import ModelConfig
class MyConfig(BaseModel):
model: ModelConfig
```
### Naming Conventions (PEP 8)
Follow PEP 8 naming conventions:

View file

@ -45,14 +45,25 @@ help:
@echo " check-license-headers - Check if all files have license headers"
@echo " update-license-headers - Add license headers to all files"
@echo ""
@echo "⚡ Performance:"
@echo " perf-import - Profile import time and show summary"
@echo " perf-import CLEAN=1 - Clean cache, then profile import time"
@echo " perf-import NOFILE=1 - Profile without writing to file (for CI)"
@echo ""
@echo "═════════════════════════════════════════════════════════════"
@echo "💡 Tip: Run 'make <command>' to execute any command above"
@echo ""
clean:
@echo "🧹 Cleaning up coverage reports and cache files..."
rm -rf htmlcov .coverage .pytest_cache
clean-pycache:
@echo "🧹 Cleaning up Python cache files..."
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
find . -type f -name "*.pyc" -delete 2>/dev/null || true
@echo "✅ Cache cleaned!"
clean: clean-pycache
@echo "🧹 Cleaning up coverage reports and test cache..."
rm -rf htmlcov .coverage .pytest_cache
@echo "✅ Cleaned!"
coverage:
@echo "📊 Running tests with coverage analysis..."
@ -168,4 +179,34 @@ install-dev-notebooks:
$(call install-pre-commit-hooks)
@echo "✅ Dev + notebooks installation complete!"
.PHONY: clean coverage format format-check lint lint-fix test test-e2e test-run-tutorials test-run-recipes test-run-all-examples check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks
perf-import:
ifdef CLEAN
@$(MAKE) clean-pycache
endif
@echo "⚡ Profiling import time for data_designer.essentials..."
ifdef NOFILE
@PERF_OUTPUT=$$(uv run python -X importtime -c "import data_designer.essentials" 2>&1); \
echo "$$PERF_OUTPUT"; \
echo ""; \
echo "Summary:"; \
echo "$$PERF_OUTPUT" | tail -1 | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
echo ""; \
echo "💡 Top 10 slowest imports:"; \
printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
echo "$$PERF_OUTPUT" | grep "import time:" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
else
@PERF_FILE="perf_import_$$(date +%Y%m%d_%H%M%S).txt"; \
uv run python -X importtime -c "import data_designer.essentials" > "$$PERF_FILE" 2>&1; \
echo "📊 Import profile saved to $$PERF_FILE"; \
echo ""; \
echo "Summary:"; \
tail -1 "$$PERF_FILE" | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
echo ""; \
echo "💡 Top 10 slowest imports:"; \
printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
grep "import time:" "$$PERF_FILE" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
endif
.PHONY: clean clean-pycache coverage format format-check lint lint-fix test test-e2e test-run-tutorials test-run-recipes test-run-all-examples check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks perf-import

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
try:
from data_designer._version import __version__
except ImportError:

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.main import app, main
__all__ = ["app", "main"]

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import typer
from data_designer.cli.controllers.download_controller import DownloadController

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from rich.table import Table
from data_designer.cli.repositories.model_repository import ModelRepository

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.controllers.model_controller import ModelController
from data_designer.config.utils.constants import DATA_DESIGNER_HOME

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.controllers.provider_controller import ProviderController
from data_designer.config.utils.constants import DATA_DESIGNER_HOME

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import typer
from data_designer.cli.repositories.model_repository import ModelRepository

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.controllers.download_controller import DownloadController
from data_designer.cli.controllers.model_controller import ModelController
from data_designer.cli.controllers.provider_controller import ProviderController

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import subprocess
from pathlib import Path

View file

@ -1,7 +1,10 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from data_designer.cli.forms.model_builder import ModelFormBuilder
from data_designer.cli.repositories.model_repository import ModelRepository
@ -20,7 +23,9 @@ from data_designer.cli.ui import (
print_warning,
select_with_arrows,
)
from data_designer.config.models import ModelConfig
if TYPE_CHECKING:
from data_designer.config.models import ModelConfig
class ModelController:

View file

@ -1,8 +1,11 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import copy
from pathlib import Path
from typing import TYPE_CHECKING
from data_designer.cli.forms.provider_builder import ProviderFormBuilder
from data_designer.cli.repositories.model_repository import ModelRepository
@ -20,7 +23,9 @@ from data_designer.cli.ui import (
print_warning,
select_with_arrows,
)
from data_designer.engine.model_provider import ModelProvider
if TYPE_CHECKING:
from data_designer.engine.model_provider import ModelProvider
class ProviderController:

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.forms.builder import FormBuilder
from data_designer.cli.forms.field import Field, NumericField, SelectField, TextField, ValidationError
from data_designer.cli.forms.form import Form

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Generic, TypeVar

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC, abstractmethod
from collections.abc import Callable
from typing import Any, Generic, TypeVar

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import Any
from data_designer.cli.forms.field import Field

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import Any
from data_designer.cli.forms.builder import FormBuilder

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import Any
from data_designer.cli.forms.builder import FormBuilder

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import typer
from data_designer.cli.commands import download, models, providers, reset

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.repositories.base import ConfigRepository
from data_designer.cli.repositories.model_repository import ModelRepository
from data_designer.cli.repositories.provider_repository import ProviderRepository

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Generic, TypeVar

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from pathlib import Path
from pydantic import BaseModel

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from pydantic import BaseModel
from data_designer.config.utils.constants import (

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from pathlib import Path
from pydantic import BaseModel

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.services.download_service import DownloadService
from data_designer.cli.services.model_service import ModelService
from data_designer.cli.services.provider_service import ProviderService

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import glob
import shutil
import subprocess

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.repositories.model_repository import ModelConfigRegistry, ModelRepository
from data_designer.config.models import ModelConfig

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository
from data_designer.config.models import ModelProvider

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from collections.abc import Callable
from prompt_toolkit import Application, prompt

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import shutil
import subprocess

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC
from enum import Enum

View file

@ -5,9 +5,8 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from enum import Enum
from typing import Any, Literal
from typing import TYPE_CHECKING, Any, Literal
from pandas import Series
from pydantic import BaseModel, ConfigDict, create_model, field_validator, model_validator
from typing_extensions import Self, TypeAlias
@ -15,8 +14,12 @@ from data_designer.config.column_types import DataDesignerColumnType
from data_designer.config.sampler_params import SamplerType
from data_designer.config.utils.constants import EPSILON
from data_designer.config.utils.numerical_helpers import is_float, is_int, prepare_number_for_reporting
from data_designer.lazy_heavy_imports import pd
from data_designer.plugin_manager import PluginManager
if TYPE_CHECKING:
import pandas as pd
class MissingValue(str, Enum):
CALCULATION_FAILED = "--"
@ -314,7 +317,7 @@ class CategoricalHistogramData(BaseModel):
return self
@classmethod
def from_series(cls, series: Series) -> Self:
def from_series(cls, series: pd.Series) -> Self:
counts = series.value_counts()
return cls(categories=counts.index.tolist(), counts=counts.tolist())
@ -337,7 +340,7 @@ class CategoricalDistribution(BaseModel):
return str(v) if not is_int(v) else prepare_number_for_reporting(v, int)
@classmethod
def from_series(cls, series: Series) -> Self:
def from_series(cls, series: pd.Series) -> Self:
counts = series.value_counts()
return cls(
most_common_value=counts.index[0],
@ -368,7 +371,7 @@ class NumericalDistribution(BaseModel):
return prepare_number_for_reporting(v, int if is_int(v) else float)
@classmethod
def from_series(cls, series: Series) -> Self:
def from_series(cls, series: pd.Series) -> Self:
return cls(
min=series.min(skipna=True),
max=series.max(skipna=True),

View file

@ -1,19 +1,25 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from functools import cached_property
from pathlib import Path
from typing import Annotated
from typing import TYPE_CHECKING, Annotated
from pydantic import BaseModel, Field, field_validator
from data_designer.config.analysis.column_profilers import ColumnProfilerResultsT
from data_designer.config.analysis.column_statistics import ColumnStatisticsT
from data_designer.config.analysis.utils.reporting import ReportSection, generate_analysis_report
from data_designer.config.column_types import DataDesignerColumnType, get_column_display_order
from data_designer.config.analysis.utils.reporting import generate_analysis_report
from data_designer.config.column_types import get_column_display_order
from data_designer.config.utils.constants import EPSILON
from data_designer.config.utils.numerical_helpers import prepare_number_for_reporting
if TYPE_CHECKING:
from data_designer.config.analysis.utils.reporting import ReportSection
from data_designer.config.column_types import DataDesignerColumnType
class DatasetProfilerResults(BaseModel):
"""Container for complete dataset profiling and analysis results.

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.errors import DataDesignerError

View file

@ -14,7 +14,6 @@ from rich.rule import Rule
from rich.table import Column, Table
from rich.text import Text
from data_designer.config.analysis.column_statistics import CategoricalHistogramData
from data_designer.config.analysis.utils.errors import AnalysisReportError
from data_designer.config.column_types import (
DataDesignerColumnType,
@ -29,6 +28,7 @@ from data_designer.config.utils.visualization import (
)
if TYPE_CHECKING:
from data_designer.config.analysis.column_statistics import CategoricalHistogramData
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
HEADER_STYLE = "dim"

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Annotated, Literal

View file

@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing_extensions import TypeAlias

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from enum import Enum

View file

@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
import os

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.errors import DataDesignerError

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.config.analysis.column_profilers import JudgeScoreProfilerConfig
from data_designer.config.column_configs import (
EmbeddingColumnConfig,

View file

@ -6,13 +6,14 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Generic, Protocol, TypeVar
import pandas as pd
from data_designer.config.models import ModelConfig, ModelProvider
from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS
from data_designer.config.utils.info import InterfaceInfo
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
from data_designer.config.config_builder import DataDesignerConfigBuilder
from data_designer.config.preview_results import PreviewResults

View file

@ -1,13 +1,14 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from enum import Enum
from pathlib import Path
from typing import Annotated, Any, Generic, Literal, TypeVar
from typing import TYPE_CHECKING, Annotated, Any, Generic, Literal, TypeVar
import numpy as np
from pydantic import BaseModel, Field, field_validator, model_validator
from typing_extensions import Self, TypeAlias
@ -20,6 +21,10 @@ from data_designer.config.utils.constants import (
MIN_TOP_P,
)
from data_designer.config.utils.io_helpers import smart_load_yaml
from data_designer.lazy_heavy_imports import np
if TYPE_CHECKING:
import numpy as np
logger = logging.getLogger(__name__)

View file

@ -3,12 +3,16 @@
from __future__ import annotations
import pandas as pd
from typing import TYPE_CHECKING
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
from data_designer.config.config_builder import DataDesignerConfigBuilder
from data_designer.config.dataset_metadata import DatasetMetadata
from data_designer.config.utils.visualization import WithRecordSamplerMixin
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
class PreviewResults(WithRecordSamplerMixin):

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import json
from abc import ABC
from enum import Enum

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from pydantic import Field, model_validator
from typing_extensions import Self

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC, abstractmethod
from enum import Enum

View file

@ -1,10 +1,11 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from enum import Enum
from typing import Literal
from __future__ import annotations
from enum import Enum
from typing import TYPE_CHECKING, Literal
import pandas as pd
from pydantic import Field, field_validator, model_validator
from typing_extensions import Self, TypeAlias
@ -16,6 +17,10 @@ from data_designer.config.utils.constants import (
MAX_AGE,
MIN_AGE,
)
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
class SamplerType(str, Enum):

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from enum import Enum
from pydantic import Field, model_validator

View file

@ -1,10 +1,11 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from abc import ABC
from typing import Literal
from __future__ import annotations
from abc import ABC
from typing import TYPE_CHECKING, Literal
import pandas as pd
from pydantic import BaseModel, ConfigDict, Field, field_validator
from pydantic.json_schema import SkipJsonSchema
from typing_extensions import Self
@ -14,6 +15,10 @@ from data_designer.config.utils.io_helpers import (
validate_dataset_file_path,
validate_path_contains_files_of_type,
)
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
class SeedSource(BaseModel, ABC):

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import Annotated
from pydantic import Field

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import os
from enum import Enum
from pathlib import Path

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.errors import DataDesignerError

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC, abstractmethod
from enum import Enum
from typing import Literal, TypeVar

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import json
import logging
import os
@ -8,13 +10,16 @@ from datetime import date, datetime, timedelta
from decimal import Decimal
from numbers import Number
from pathlib import Path
from typing import Any
from typing import TYPE_CHECKING, Any
import numpy as np
import pandas as pd
import yaml
from data_designer.config.errors import InvalidFileFormatError, InvalidFilePathError
from data_designer.lazy_heavy_imports import np, pd
if TYPE_CHECKING:
import numpy as np
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import numbers
from numbers import Number
from typing import Any

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import inspect
from enum import Enum
from typing import Any, Literal, get_args, get_origin

View file

@ -10,8 +10,6 @@ from enum import Enum
from functools import cached_property
from typing import TYPE_CHECKING, Any
import numpy as np
import pandas as pd
from rich.console import Console, Group
from rich.padding import Padding
from rich.panel import Panel
@ -28,8 +26,12 @@ from data_designer.config.sampler_params import SamplerType
from data_designer.config.utils.code_lang import code_lang_to_syntax_lexer
from data_designer.config.utils.constants import NVIDIA_API_KEY_ENV_VAR_NAME, OPENAI_API_KEY_ENV_VAR_NAME
from data_designer.config.utils.errors import DatasetSampleDisplayError
from data_designer.lazy_heavy_imports import np, pd
if TYPE_CHECKING:
import numpy as np
import pandas as pd
from data_designer.config.config_builder import DataDesignerConfigBuilder
from data_designer.config.dataset_metadata import DatasetMetadata

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from enum import Enum
from typing import Any

View file

@ -5,8 +5,8 @@ from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING
import pandas as pd
from pydantic import BaseModel, model_validator
from typing_extensions import Self
@ -14,6 +14,10 @@ from data_designer.config.base import ConfigBase
from data_designer.config.column_configs import SingleColumnConfig
from data_designer.config.column_types import DataDesignerColumnType
from data_designer.engine.configurable_task import ConfigurableTask, TaskConfigT
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -5,19 +5,16 @@ from __future__ import annotations
import logging
import random
from typing import TYPE_CHECKING
from data_designer.config.analysis.column_profilers import (
JudgeScoreProfilerConfig,
JudgeScoreProfilerResults,
JudgeScoreSample,
JudgeScoreSummary,
)
from data_designer.config.analysis.column_statistics import (
CategoricalDistribution,
CategoricalHistogramData,
ColumnDistributionType,
MissingValue,
NumericalDistribution,
)
from data_designer.config.column_types import DataDesignerColumnType
from data_designer.engine.analysis.column_profilers.base import ColumnConfigWithDataFrame, ColumnProfiler
@ -25,9 +22,17 @@ from data_designer.engine.analysis.utils.judge_score_processing import (
extract_judge_score_distributions,
sample_scores_and_reasoning,
)
from data_designer.engine.models.facade import ModelFacade
from data_designer.engine.models.recipes.response_recipes import TextResponseRecipe
if TYPE_CHECKING:
from data_designer.config.analysis.column_profilers import JudgeScoreSample
from data_designer.config.analysis.column_statistics import (
CategoricalDistribution,
CategoricalHistogramData,
NumericalDistribution,
)
from data_designer.engine.models.facade import ModelFacade
logger = logging.getLogger(__name__)

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.config.analysis.column_profilers import ColumnProfilerType
from data_designer.config.base import ConfigBase
from data_designer.engine.analysis.column_profilers.base import ColumnProfiler

View file

@ -4,9 +4,8 @@
from __future__ import annotations
import logging
from typing import Any, TypeAlias
from typing import TYPE_CHECKING, Any, TypeAlias
import pandas as pd
from pydantic import BaseModel
from typing_extensions import Self
@ -25,6 +24,10 @@ from data_designer.engine.analysis.utils.column_statistics_calculations import (
calculate_token_stats,
calculate_validation_column_info,
)
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -1,12 +1,13 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from collections.abc import Sequence
from functools import cached_property
from typing import TYPE_CHECKING
import pandas as pd
import pyarrow as pa
from pydantic import Field, field_validator
from data_designer.config.analysis.column_profilers import ColumnProfilerConfigT
@ -21,6 +22,11 @@ from data_designer.engine.analysis.utils.column_statistics_calculations import h
from data_designer.engine.dataset_builders.multi_column_configs import DatasetBuilderColumnConfigT, MultiColumnConfig
from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry
from data_designer.engine.resources.resource_provider import ResourceProvider
from data_designer.lazy_heavy_imports import pa, pd
if TYPE_CHECKING:
import pandas as pd
import pyarrow as pa
logger = logging.getLogger(__name__)

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.errors import DataDesignerError

View file

@ -5,11 +5,8 @@ from __future__ import annotations
import logging
from numbers import Number
from typing import Any
from typing import TYPE_CHECKING, Any
import numpy as np
import pandas as pd
import pyarrow as pa
import tiktoken
from data_designer.config.analysis.column_statistics import (
@ -26,6 +23,12 @@ from data_designer.engine.column_generators.utils.prompt_renderer import (
RecordBasedPromptRenderer,
create_response_recipe,
)
from data_designer.lazy_heavy_imports import np, pa, pd
if TYPE_CHECKING:
import numpy as np
import pandas as pd
import pyarrow as pa
RANDOM_SEED = 42
MAX_PROMPT_SAMPLE_SIZE = 1000

View file

@ -1,11 +1,11 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from collections import defaultdict
from typing import Any
import pandas as pd
from typing import TYPE_CHECKING, Any
from data_designer.config.analysis.column_profilers import JudgeScoreDistributions, JudgeScoreSample
from data_designer.config.analysis.column_statistics import (
@ -15,6 +15,10 @@ from data_designer.config.analysis.column_statistics import (
NumericalDistribution,
)
from data_designer.config.column_configs import LLMJudgeColumnConfig
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -9,16 +9,16 @@ from abc import ABC, abstractmethod
from enum import Enum
from typing import TYPE_CHECKING, overload
import pandas as pd
from data_designer.engine.configurable_task import ConfigurableTask, DataT, TaskConfigT
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
from data_designer.config.models import BaseInferenceParams, ModelConfig
from data_designer.engine.models.facade import ModelFacade
from data_designer.engine.models.registry import ModelRegistry
logger = logging.getLogger(__name__)

View file

@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from pydantic import BaseModel, computed_field

View file

@ -4,14 +4,17 @@
from __future__ import annotations
import logging
import pandas as pd
from typing import TYPE_CHECKING
from data_designer.config.column_configs import ExpressionColumnConfig
from data_designer.engine.column_generators.generators.base import ColumnGeneratorFullColumn
from data_designer.engine.column_generators.utils.errors import ExpressionTemplateRenderError
from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering
from data_designer.engine.processing.utils import deserialize_json_values
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import functools
import logging

View file

@ -6,9 +6,7 @@ from __future__ import annotations
import logging
import random
from functools import partial
from typing import Callable
import pandas as pd
from typing import TYPE_CHECKING, Callable
from data_designer.config.utils.constants import LOCALES_WITH_MANAGED_DATASETS
from data_designer.engine.column_generators.generators.base import FromScratchColumnGenerator, GenerationStrategy
@ -18,6 +16,10 @@ from data_designer.engine.resources.managed_dataset_generator import ManagedData
from data_designer.engine.sampling_gen.data_sources.sources import SamplerType
from data_designer.engine.sampling_gen.entities.person import load_person_data_sampler
from data_designer.engine.sampling_gen.generator import DatasetGenerator as SamplingDatasetGenerator
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -1,20 +1,22 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import functools
import logging
import duckdb
import pandas as pd
from typing import TYPE_CHECKING
from data_designer.config.seed import IndexRange, PartitionBlock, SamplingStrategy
from data_designer.engine.column_generators.generators.base import FromScratchColumnGenerator, GenerationStrategy
from data_designer.engine.column_generators.utils.errors import SeedDatasetError
from data_designer.engine.dataset_builders.multi_column_configs import SeedDatasetMultiColumnConfig
from data_designer.engine.processing.utils import concat_datasets
from data_designer.lazy_heavy_imports import duckdb, pd
if TYPE_CHECKING:
import duckdb
import pandas as pd
MAX_ZERO_RECORD_RESPONSE_FACTOR = 2

View file

@ -4,8 +4,7 @@
from __future__ import annotations
import logging
import pandas as pd
from typing import TYPE_CHECKING
from data_designer.config.column_configs import ValidationColumnConfig
from data_designer.config.errors import InvalidConfigError
@ -22,6 +21,10 @@ from data_designer.engine.validators import (
SQLValidator,
ValidationResult,
)
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.config.base import ConfigBase
from data_designer.config.column_configs import (
EmbeddingColumnConfig,

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.engine.errors import DataDesignerError

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.config.column_types import DataDesignerColumnType
from data_designer.config.utils.type_helpers import resolve_string_enum
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModelRegistry

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from enum import Enum
from pydantic import BaseModel, ConfigDict, Field, create_model

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import json
import logging

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from data_designer.config.column_configs import SeedDatasetColumnConfig

View file

@ -1,15 +1,19 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC
from pathlib import Path
from typing import Generic, TypeVar, get_origin
import pandas as pd
from typing import TYPE_CHECKING, Generic, TypeVar, get_origin
from data_designer.config.base import ConfigBase
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
from data_designer.engine.resources.resource_provider import ResourceProvider
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
DataT = TypeVar("DataT", dict, pd.DataFrame)
TaskConfigT = TypeVar("ConfigT", bound=ConfigBase)

View file

@ -1,19 +1,25 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import json
import logging
import shutil
from datetime import datetime
from functools import cached_property
from pathlib import Path
from typing import TYPE_CHECKING
import pandas as pd
from pydantic import BaseModel, field_validator, model_validator
from data_designer.config.utils.io_helpers import read_parquet_dataset
from data_designer.config.utils.type_helpers import StrEnum, resolve_string_enum
from data_designer.engine.dataset_builders.errors import ArtifactStorageError
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
logger = logging.getLogger(__name__)

View file

@ -12,8 +12,6 @@ import uuid
from pathlib import Path
from typing import TYPE_CHECKING, Callable
import pandas as pd
from data_designer.config.column_types import ColumnConfigT
from data_designer.config.dataset_builders import BuildStage
from data_designer.config.processors import (
@ -40,14 +38,16 @@ from data_designer.engine.processing.processors.base import Processor
from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor
from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry
from data_designer.engine.resources.resource_provider import ResourceProvider
from data_designer.lazy_heavy_imports import pd
if TYPE_CHECKING:
import pandas as pd
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModelRegistry
from data_designer.engine.models.usage import ModelUsageStats
logger = logging.getLogger(__name__)
_CLIENT_VERSION: str = importlib.metadata.version("data_designer")

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.engine.errors import DataDesignerError

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from abc import ABC
from typing import TypeAlias

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.config.column_types import DataDesignerColumnType
from data_designer.config.data_designer_config import DataDesignerConfig
from data_designer.config.processors import ProcessorConfig

View file

@ -1,13 +1,18 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import logging
from __future__ import annotations
import networkx as nx
import logging
from typing import TYPE_CHECKING
from data_designer.config.column_types import ColumnConfigT
from data_designer.engine.column_generators.utils.generator_classification import column_type_used_in_execution_dag
from data_designer.engine.dataset_builders.utils.errors import DAGCircularDependencyError
from data_designer.lazy_heavy_imports import nx
if TYPE_CHECKING:
import networkx as nx
logger = logging.getLogger(__name__)

View file

@ -1,16 +1,20 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
import shutil
from pathlib import Path
from typing import Callable, Container, Iterator
import pandas as pd
import pyarrow.parquet as pq
from typing import TYPE_CHECKING, Callable, Container, Iterator
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage, BatchStage
from data_designer.engine.dataset_builders.utils.errors import DatasetBatchManagementError
from data_designer.lazy_heavy_imports import pd, pq
if TYPE_CHECKING:
import pandas as pd
import pyarrow.parquet as pq
logger = logging.getLogger(__name__)

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.engine.errors import DataDesignerError

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from pydantic import BaseModel, Field
from data_designer.errors import DataDesignerError

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from functools import cached_property
from pydantic import BaseModel, field_validator, model_validator

View file

@ -6,25 +6,15 @@ from __future__ import annotations
import logging
from collections.abc import Callable
from functools import wraps
from typing import Any
from typing import TYPE_CHECKING, Any
from litellm.exceptions import (
APIConnectionError,
APIError,
AuthenticationError,
BadRequestError,
ContextWindowExceededError,
InternalServerError,
NotFoundError,
PermissionDeniedError,
RateLimitError,
Timeout,
UnprocessableEntityError,
UnsupportedParamsError,
)
from pydantic import BaseModel
from data_designer.engine.errors import DataDesignerError
from data_designer.lazy_heavy_imports import litellm
if TYPE_CHECKING:
import litellm
logger = logging.getLogger(__name__)
@ -132,10 +122,10 @@ def handle_llm_exceptions(
err_msg_parser = DownstreamLLMExceptionMessageParser(model_name, model_provider_name, purpose)
match exception:
# Common errors that can come from LiteLLM
case APIError():
case litellm.exceptions.APIError():
raise err_msg_parser.parse_api_error(exception, authentication_error) from None
case APIConnectionError():
case litellm.exceptions.APIConnectionError():
raise ModelAPIConnectionError(
FormattedLLMErrorMessage(
cause=f"Connection to model {model_name!r} hosted on model provider {model_provider_name!r} failed while {purpose}.",
@ -143,13 +133,13 @@ def handle_llm_exceptions(
)
) from None
case AuthenticationError():
case litellm.exceptions.AuthenticationError():
raise ModelAuthenticationError(authentication_error) from None
case ContextWindowExceededError():
case litellm.exceptions.ContextWindowExceededError():
raise err_msg_parser.parse_context_window_exceeded_error(exception) from None
case UnsupportedParamsError():
case litellm.exceptions.UnsupportedParamsError():
raise ModelUnsupportedParamsError(
FormattedLLMErrorMessage(
cause=f"One or more of the parameters you provided were found to be unsupported by model {model_name!r} while {purpose}.",
@ -157,10 +147,10 @@ def handle_llm_exceptions(
)
) from None
case BadRequestError():
case litellm.exceptions.BadRequestError():
raise err_msg_parser.parse_bad_request_error(exception) from None
case InternalServerError():
case litellm.exceptions.InternalServerError():
raise ModelInternalServerError(
FormattedLLMErrorMessage(
cause=f"Model {model_name!r} is currently experiencing internal server issues while {purpose}.",
@ -168,7 +158,7 @@ def handle_llm_exceptions(
)
) from None
case NotFoundError():
case litellm.exceptions.NotFoundError():
raise ModelNotFoundError(
FormattedLLMErrorMessage(
cause=f"The specified model {model_name!r} could not be found while {purpose}.",
@ -176,7 +166,7 @@ def handle_llm_exceptions(
)
) from None
case PermissionDeniedError():
case litellm.exceptions.PermissionDeniedError():
raise ModelPermissionDeniedError(
FormattedLLMErrorMessage(
cause=f"Your API key was found to lack the necessary permissions to use model {model_name!r} while {purpose}.",
@ -184,7 +174,7 @@ def handle_llm_exceptions(
)
) from None
case RateLimitError():
case litellm.exceptions.RateLimitError():
raise ModelRateLimitError(
FormattedLLMErrorMessage(
cause=f"You have exceeded the rate limit for model {model_name!r} while {purpose}.",
@ -192,7 +182,7 @@ def handle_llm_exceptions(
)
) from None
case Timeout():
case litellm.exceptions.Timeout():
raise ModelTimeoutError(
FormattedLLMErrorMessage(
cause=f"The request to model {model_name!r} timed out while {purpose}.",
@ -200,7 +190,7 @@ def handle_llm_exceptions(
)
) from None
case UnprocessableEntityError():
case litellm.exceptions.UnprocessableEntityError():
raise ModelUnprocessableEntityError(
FormattedLLMErrorMessage(
cause=f"The request to model {model_name!r} failed despite correct request format while {purpose}.",
@ -264,7 +254,7 @@ class DownstreamLLMExceptionMessageParser:
self.model_provider_name = model_provider_name
self.purpose = purpose
def parse_bad_request_error(self, exception: BadRequestError) -> DataDesignerError:
def parse_bad_request_error(self, exception: litellm.exceptions.BadRequestError) -> DataDesignerError:
err_msg = FormattedLLMErrorMessage(
cause=f"The request for model {self.model_name!r} was found to be malformed or missing required parameters while {self.purpose}.",
solution="Check your request parameters and try again.",
@ -276,7 +266,9 @@ class DownstreamLLMExceptionMessageParser:
)
return ModelBadRequestError(err_msg)
def parse_context_window_exceeded_error(self, exception: ContextWindowExceededError) -> DataDesignerError:
def parse_context_window_exceeded_error(
self, exception: litellm.exceptions.ContextWindowExceededError
) -> DataDesignerError:
cause = f"The input data for model '{self.model_name}' was found to exceed its supported context width while {self.purpose}."
try:
if "OpenAIException - This model's maximum context length is " in str(exception):
@ -295,7 +287,7 @@ class DownstreamLLMExceptionMessageParser:
)
def parse_api_error(
self, exception: InternalServerError, auth_error_msg: FormattedLLMErrorMessage
self, exception: litellm.exceptions.InternalServerError, auth_error_msg: FormattedLLMErrorMessage
) -> DataDesignerError:
if "Error code: 403" in str(exception):
return ModelAuthenticationError(auth_error_msg)

View file

@ -6,10 +6,7 @@ from __future__ import annotations
import logging
from collections.abc import Callable
from copy import deepcopy
from typing import Any
from litellm.types.router import DeploymentTypedDict, LiteLLM_Params
from litellm.types.utils import EmbeddingResponse, ModelResponse
from typing import TYPE_CHECKING, Any
from data_designer.config.models import GenerationType, ModelConfig, ModelProvider
from data_designer.engine.model_provider import ModelProviderRegistry
@ -23,6 +20,10 @@ from data_designer.engine.models.parsers.errors import ParserException
from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
from data_designer.engine.models.utils import prompt_to_messages, str_to_message
from data_designer.engine.secret_resolver import SecretResolver
from data_designer.lazy_heavy_imports import litellm
if TYPE_CHECKING:
import litellm
logger = logging.getLogger(__name__)
@ -65,7 +66,9 @@ class ModelFacade:
def usage_stats(self) -> ModelUsageStats:
return self._usage_stats
def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs) -> ModelResponse:
def completion(
self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs
) -> litellm.ModelResponse:
logger.debug(
f"Prompting model {self.model_name!r}...",
extra={"model": self.model_name, "messages": messages},
@ -236,14 +239,14 @@ class ModelFacade:
) from exc
return output_obj, reasoning_trace
def _get_litellm_deployment(self, model_config: ModelConfig) -> DeploymentTypedDict:
def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict:
provider = self._model_provider_registry.get_provider(model_config.provider)
api_key = None
if provider.api_key:
api_key = self._secret_resolver.resolve(provider.api_key)
api_key = api_key or "not-used-but-required"
litellm_params = LiteLLM_Params(
litellm_params = litellm.LiteLLM_Params(
model=f"{provider.provider_type}/{model_config.model}",
api_base=provider.endpoint,
api_key=api_key,
@ -253,7 +256,7 @@ class ModelFacade:
"litellm_params": litellm_params.model_dump(),
}
def _track_usage(self, response: ModelResponse | None) -> None:
def _track_usage(self, response: litellm.types.utils.ModelResponse | None) -> None:
if response is None:
self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1))
return
@ -270,7 +273,7 @@ class ModelFacade:
request_usage=RequestUsageStats(successful_requests=1, failed_requests=0),
)
def _track_usage_from_embedding(self, response: EmbeddingResponse | None) -> None:
def _track_usage_from_embedding(self, response: litellm.types.utils.EmbeddingResponse | None) -> None:
if response is None:
self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1))
return

View file

@ -0,0 +1,42 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import TYPE_CHECKING
from data_designer.config.models import ModelConfig
from data_designer.engine.model_provider import ModelProviderRegistry
from data_designer.engine.secret_resolver import SecretResolver
if TYPE_CHECKING:
from data_designer.engine.models.registry import ModelRegistry
def create_model_registry(
*,
model_configs: list[ModelConfig] | None = None,
secret_resolver: SecretResolver,
model_provider_registry: ModelProviderRegistry,
) -> ModelRegistry:
"""Factory function for creating a ModelRegistry instance.
Heavy dependencies (litellm, httpx) are deferred until this function is called.
This is a factory function pattern - imports inside factories are idiomatic Python
for lazy initialization.
"""
from data_designer.engine.models.facade import ModelFacade
from data_designer.engine.models.litellm_overrides import apply_litellm_patches
from data_designer.engine.models.registry import ModelRegistry
apply_litellm_patches()
def model_facade_factory(model_config, secret_resolver, model_provider_registry):
return ModelFacade(model_config, secret_resolver, model_provider_registry)
return ModelRegistry(
model_configs=model_configs,
secret_resolver=secret_resolver,
model_provider_registry=model_provider_registry,
model_facade_factory=model_facade_factory,
)

View file

@ -5,21 +5,26 @@ from __future__ import annotations
import random
import threading
from typing import TYPE_CHECKING
import httpx
import litellm
from litellm import RetryPolicy
from litellm.caching.in_memory_cache import InMemoryCache
from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
from litellm.router import Router
from pydantic import BaseModel, Field
from typing_extensions import override
from data_designer.lazy_heavy_imports import httpx, litellm
from data_designer.logging import quiet_noisy_logger
if TYPE_CHECKING:
import httpx
import litellm
DEFAULT_MAX_CALLBACKS = 1000
def _get_logging_callback_manager():
"""Lazy accessor for LoggingCallbackManager to avoid loading litellm at import time."""
return litellm.litellm_core_utils.logging_callback_manager.LoggingCallbackManager
class LiteLLMRouterDefaultKwargs(BaseModel):
## Number of seconds to wait initially after a connection
## failure.
@ -35,15 +40,15 @@ class LiteLLMRouterDefaultKwargs(BaseModel):
## Sets the default retry policy, including the number
## of retries to use in particular scenarios.
retry_policy: RetryPolicy = Field(
default_factory=lambda: RetryPolicy(
retry_policy: litellm.RetryPolicy = Field(
default_factory=lambda: litellm.RetryPolicy(
RateLimitErrorRetries=3,
TimeoutErrorRetries=3,
)
)
class ThreadSafeCache(InMemoryCache):
class ThreadSafeCache(litellm.caching.in_memory_cache.InMemoryCache):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@ -78,7 +83,7 @@ class ThreadSafeCache(InMemoryCache):
super().flush_cache()
class CustomRouter(Router):
class CustomRouter(litellm.router.Router):
def __init__(
self,
*args,
@ -155,7 +160,7 @@ def apply_litellm_patches():
litellm.in_memory_llm_clients_cache = ThreadSafeCache()
# Workaround for the litellm issue described in https://github.com/BerriAI/litellm/issues/9792
LoggingCallbackManager.MAX_CALLBACKS = DEFAULT_MAX_CALLBACKS
_get_logging_callback_manager().MAX_CALLBACKS = DEFAULT_MAX_CALLBACKS
quiet_noisy_logger("httpx")
quiet_noisy_logger("LiteLLM")

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
class ParserException(Exception):
"""Identifies errors resulting from generic parser errors.

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from functools import reduce
import marko
@ -80,13 +82,11 @@ class LLMResponseParser:
code: str
syntax: Optional[str] = None
class CodeBlockParser:
def __call__(self, element: _Element) -> CodeBlock:
# Implementation details...
return CodeBlock(code=element.text, syntax=element.get("class"))
parser = LLMResponseParser(
tag_parsers={
"pre.code": CodeBlockParser(),

View file

@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import json_repair
from pydantic import BaseModel, ValidationError

Some files were not shown because too many files have changed in this diff Show more