mirror of
https://github.com/NVIDIA-NeMo/DataDesigner
synced 2026-05-24 09:48:29 +00:00
chore: lazy 3rd party imports (#222)
This commit is contained in:
parent
1ee37bc317
commit
7181db3eb7
169 changed files with 1003 additions and 255 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -93,3 +93,6 @@ docs/notebook_source/*.csv
|
|||
docs/**/artifacts/
|
||||
|
||||
tests_e2e/uv.lock
|
||||
|
||||
# Performance profiling
|
||||
perf_*.txt
|
||||
|
|
|
|||
145
AGENTS.md
145
AGENTS.md
|
|
@ -158,12 +158,13 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
|
|||
### Import Style
|
||||
|
||||
- **ALWAYS** use absolute imports, never relative imports
|
||||
- Place imports at module level, not inside functions
|
||||
- Place imports at module level, not inside functions (exception: it is unavoidable for performance reasons)
|
||||
- Import sorting is handled by `ruff`'s `isort` - imports should be grouped and sorted:
|
||||
1. Standard library imports
|
||||
2. Third-party imports
|
||||
2. Third-party imports (use `lazy_heavy_imports` for heavy libraries)
|
||||
3. First-party imports (`data_designer`)
|
||||
- Use standard import conventions (enforced by `ICN`)
|
||||
- See [Lazy Loading and TYPE_CHECKING](#lazy-loading-and-type_checking) section for optimization guidelines
|
||||
|
||||
```python
|
||||
# Good
|
||||
|
|
@ -184,6 +185,146 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
|
|||
path = Path(filename)
|
||||
```
|
||||
|
||||
### Lazy Loading and TYPE_CHECKING
|
||||
|
||||
This project uses lazy loading for heavy third-party dependencies to optimize import performance.
|
||||
|
||||
#### When to Use Lazy Loading
|
||||
|
||||
**Heavy third-party libraries** (>100ms import cost) should be lazy-loaded via `lazy_heavy_imports.py`:
|
||||
|
||||
```python
|
||||
# ❌ Don't import directly
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
# ✅ Use lazy loading with IDE support
|
||||
from typing import TYPE_CHECKING
|
||||
from data_designer.lazy_heavy_imports import pd, np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd # For IDE autocomplete and type hints
|
||||
import numpy as np
|
||||
```
|
||||
|
||||
This pattern provides:
|
||||
- Runtime lazy loading (fast startup)
|
||||
- Full IDE support (autocomplete, type hints)
|
||||
- Type checker validation
|
||||
|
||||
**See [lazy_heavy_imports.py](src/data_designer/lazy_heavy_imports.py) for the current list of lazy-loaded libraries.**
|
||||
|
||||
#### Adding New Heavy Dependencies
|
||||
|
||||
If you add a new dependency with significant import cost (>100ms):
|
||||
|
||||
1. **Add to `lazy_heavy_imports.py`:**
|
||||
```python
|
||||
_LAZY_IMPORTS = {
|
||||
# ... existing entries ...
|
||||
"your_lib": "your_library_name",
|
||||
}
|
||||
```
|
||||
|
||||
2. **Update imports across codebase:**
|
||||
```python
|
||||
from typing import TYPE_CHECKING
|
||||
from data_designer.lazy_heavy_imports import your_lib
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import your_library_name as your_lib # For IDE support
|
||||
```
|
||||
|
||||
3. **Verify with performance test:**
|
||||
```bash
|
||||
make perf-import CLEAN=1
|
||||
```
|
||||
|
||||
#### Using TYPE_CHECKING Blocks
|
||||
|
||||
`TYPE_CHECKING` blocks defer imports that are only needed for type hints, preventing circular dependencies and reducing import time.
|
||||
|
||||
**For internal data_designer imports:**
|
||||
|
||||
```python
|
||||
from __future__ import annotations # Always include at top
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Runtime imports
|
||||
from pathlib import Path
|
||||
from data_designer.config.base import ConfigBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# Type-only imports - only visible to type checkers
|
||||
from data_designer.engine.models.facade import ModelFacade
|
||||
|
||||
def get_model(model: ModelFacade) -> str:
|
||||
return model.name
|
||||
```
|
||||
|
||||
**For lazy-loaded libraries (see pattern in "When to Use Lazy Loading" above):**
|
||||
- Import from `lazy_heavy_imports` for runtime
|
||||
- Add full import in `TYPE_CHECKING` block for IDE support
|
||||
|
||||
**Rules for TYPE_CHECKING:**
|
||||
|
||||
✅ **DO put in TYPE_CHECKING:**
|
||||
- Internal `data_designer` imports used **only** in type hints
|
||||
- Imports that would cause circular dependencies
|
||||
- **Full imports of lazy-loaded libraries for IDE support** (e.g., `import pandas as pd` in addition to runtime `from data_designer.lazy_heavy_imports import pd`)
|
||||
|
||||
❌ **DON'T put in TYPE_CHECKING:**
|
||||
- **Standard library imports** (`Path`, `Any`, `Callable`, `Literal`, `TypeAlias`, etc.)
|
||||
- **Pydantic model types** used in field definitions (needed at runtime for validation)
|
||||
- **Types used in discriminated unions** (Pydantic needs them at runtime)
|
||||
- **Any import used at runtime** (instantiation, method calls, base classes, etc.)
|
||||
|
||||
**Examples:**
|
||||
|
||||
```python
|
||||
# ✅ CORRECT - Lazy-loaded library with IDE support
|
||||
from typing import TYPE_CHECKING
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd # IDE gets full type hints
|
||||
|
||||
def load_data(path: str) -> pd.DataFrame: # IDE understands pd.DataFrame
|
||||
return pd.read_csv(path)
|
||||
|
||||
# ✅ CORRECT - Standard library NOT in TYPE_CHECKING
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
def process_file(path: Path) -> Any:
|
||||
return path.read_text()
|
||||
|
||||
# ✅ CORRECT - Internal type-only import
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.engine.models.facade import ModelFacade
|
||||
|
||||
def get_model(model: ModelFacade) -> str: # Only used in type hint
|
||||
return model.name
|
||||
|
||||
# ❌ INCORRECT - Pydantic field type in TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.config.models import ModelConfig # Wrong!
|
||||
|
||||
class MyConfig(BaseModel):
|
||||
model: ModelConfig # Pydantic needs this at runtime!
|
||||
|
||||
# ✅ CORRECT - Pydantic field type at runtime
|
||||
from data_designer.config.models import ModelConfig
|
||||
|
||||
class MyConfig(BaseModel):
|
||||
model: ModelConfig
|
||||
```
|
||||
|
||||
### Naming Conventions (PEP 8)
|
||||
|
||||
Follow PEP 8 naming conventions:
|
||||
|
|
|
|||
49
Makefile
49
Makefile
|
|
@ -45,14 +45,25 @@ help:
|
|||
@echo " check-license-headers - Check if all files have license headers"
|
||||
@echo " update-license-headers - Add license headers to all files"
|
||||
@echo ""
|
||||
@echo "⚡ Performance:"
|
||||
@echo " perf-import - Profile import time and show summary"
|
||||
@echo " perf-import CLEAN=1 - Clean cache, then profile import time"
|
||||
@echo " perf-import NOFILE=1 - Profile without writing to file (for CI)"
|
||||
@echo ""
|
||||
@echo "═════════════════════════════════════════════════════════════"
|
||||
@echo "💡 Tip: Run 'make <command>' to execute any command above"
|
||||
@echo ""
|
||||
|
||||
clean:
|
||||
@echo "🧹 Cleaning up coverage reports and cache files..."
|
||||
rm -rf htmlcov .coverage .pytest_cache
|
||||
clean-pycache:
|
||||
@echo "🧹 Cleaning up Python cache files..."
|
||||
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
|
||||
find . -type f -name "*.pyc" -delete 2>/dev/null || true
|
||||
@echo "✅ Cache cleaned!"
|
||||
|
||||
clean: clean-pycache
|
||||
@echo "🧹 Cleaning up coverage reports and test cache..."
|
||||
rm -rf htmlcov .coverage .pytest_cache
|
||||
@echo "✅ Cleaned!"
|
||||
|
||||
coverage:
|
||||
@echo "📊 Running tests with coverage analysis..."
|
||||
|
|
@ -168,4 +179,34 @@ install-dev-notebooks:
|
|||
$(call install-pre-commit-hooks)
|
||||
@echo "✅ Dev + notebooks installation complete!"
|
||||
|
||||
.PHONY: clean coverage format format-check lint lint-fix test test-e2e test-run-tutorials test-run-recipes test-run-all-examples check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks
|
||||
perf-import:
|
||||
ifdef CLEAN
|
||||
@$(MAKE) clean-pycache
|
||||
endif
|
||||
@echo "⚡ Profiling import time for data_designer.essentials..."
|
||||
ifdef NOFILE
|
||||
@PERF_OUTPUT=$$(uv run python -X importtime -c "import data_designer.essentials" 2>&1); \
|
||||
echo "$$PERF_OUTPUT"; \
|
||||
echo ""; \
|
||||
echo "Summary:"; \
|
||||
echo "$$PERF_OUTPUT" | tail -1 | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
|
||||
echo ""; \
|
||||
echo "💡 Top 10 slowest imports:"; \
|
||||
printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
|
||||
printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
|
||||
echo "$$PERF_OUTPUT" | grep "import time:" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
|
||||
else
|
||||
@PERF_FILE="perf_import_$$(date +%Y%m%d_%H%M%S).txt"; \
|
||||
uv run python -X importtime -c "import data_designer.essentials" > "$$PERF_FILE" 2>&1; \
|
||||
echo "📊 Import profile saved to $$PERF_FILE"; \
|
||||
echo ""; \
|
||||
echo "Summary:"; \
|
||||
tail -1 "$$PERF_FILE" | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
|
||||
echo ""; \
|
||||
echo "💡 Top 10 slowest imports:"; \
|
||||
printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
|
||||
printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
|
||||
grep "import time:" "$$PERF_FILE" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
|
||||
endif
|
||||
|
||||
.PHONY: clean clean-pycache coverage format format-check lint lint-fix test test-e2e test-run-tutorials test-run-recipes test-run-all-examples check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks perf-import
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
try:
|
||||
from data_designer._version import __version__
|
||||
except ImportError:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.main import app, main
|
||||
|
||||
__all__ = ["app", "main"]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from data_designer.cli.controllers.download_controller import DownloadController
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from rich.table import Table
|
||||
|
||||
from data_designer.cli.repositories.model_repository import ModelRepository
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.controllers.model_controller import ModelController
|
||||
from data_designer.config.utils.constants import DATA_DESIGNER_HOME
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.controllers.provider_controller import ProviderController
|
||||
from data_designer.config.utils.constants import DATA_DESIGNER_HOME
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from data_designer.cli.repositories.model_repository import ModelRepository
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.controllers.download_controller import DownloadController
|
||||
from data_designer.cli.controllers.model_controller import ModelController
|
||||
from data_designer.cli.controllers.provider_controller import ProviderController
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.cli.forms.model_builder import ModelFormBuilder
|
||||
from data_designer.cli.repositories.model_repository import ModelRepository
|
||||
|
|
@ -20,7 +23,9 @@ from data_designer.cli.ui import (
|
|||
print_warning,
|
||||
select_with_arrows,
|
||||
)
|
||||
from data_designer.config.models import ModelConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.config.models import ModelConfig
|
||||
|
||||
|
||||
class ModelController:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.cli.forms.provider_builder import ProviderFormBuilder
|
||||
from data_designer.cli.repositories.model_repository import ModelRepository
|
||||
|
|
@ -20,7 +23,9 @@ from data_designer.cli.ui import (
|
|||
print_warning,
|
||||
select_with_arrows,
|
||||
)
|
||||
from data_designer.engine.model_provider import ModelProvider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.engine.model_provider import ModelProvider
|
||||
|
||||
|
||||
class ProviderController:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.forms.builder import FormBuilder
|
||||
from data_designer.cli.forms.field import Field, NumericField, SelectField, TextField, ValidationError
|
||||
from data_designer.cli.forms.form import Form
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Generic, TypeVar
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from typing import Any, Generic, TypeVar
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from data_designer.cli.forms.field import Field
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from data_designer.cli.forms.builder import FormBuilder
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from data_designer.cli.forms.builder import FormBuilder
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from data_designer.cli.commands import download, models, providers, reset
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.repositories.base import ConfigRepository
|
||||
from data_designer.cli.repositories.model_repository import ModelRepository
|
||||
from data_designer.cli.repositories.provider_repository import ProviderRepository
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Generic, TypeVar
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from data_designer.config.utils.constants import (
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.services.download_service import DownloadService
|
||||
from data_designer.cli.services.model_service import ModelService
|
||||
from data_designer.cli.services.provider_service import ProviderService
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import glob
|
||||
import shutil
|
||||
import subprocess
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.repositories.model_repository import ModelConfigRegistry, ModelRepository
|
||||
from data_designer.config.models import ModelConfig
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository
|
||||
from data_designer.config.models import ModelProvider
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
|
||||
from prompt_toolkit import Application, prompt
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC
|
||||
from enum import Enum
|
||||
|
||||
|
|
|
|||
|
|
@ -5,9 +5,8 @@ from __future__ import annotations
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Any, Literal
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
from pandas import Series
|
||||
from pydantic import BaseModel, ConfigDict, create_model, field_validator, model_validator
|
||||
from typing_extensions import Self, TypeAlias
|
||||
|
||||
|
|
@ -15,8 +14,12 @@ from data_designer.config.column_types import DataDesignerColumnType
|
|||
from data_designer.config.sampler_params import SamplerType
|
||||
from data_designer.config.utils.constants import EPSILON
|
||||
from data_designer.config.utils.numerical_helpers import is_float, is_int, prepare_number_for_reporting
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
from data_designer.plugin_manager import PluginManager
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class MissingValue(str, Enum):
|
||||
CALCULATION_FAILED = "--"
|
||||
|
|
@ -314,7 +317,7 @@ class CategoricalHistogramData(BaseModel):
|
|||
return self
|
||||
|
||||
@classmethod
|
||||
def from_series(cls, series: Series) -> Self:
|
||||
def from_series(cls, series: pd.Series) -> Self:
|
||||
counts = series.value_counts()
|
||||
return cls(categories=counts.index.tolist(), counts=counts.tolist())
|
||||
|
||||
|
|
@ -337,7 +340,7 @@ class CategoricalDistribution(BaseModel):
|
|||
return str(v) if not is_int(v) else prepare_number_for_reporting(v, int)
|
||||
|
||||
@classmethod
|
||||
def from_series(cls, series: Series) -> Self:
|
||||
def from_series(cls, series: pd.Series) -> Self:
|
||||
counts = series.value_counts()
|
||||
return cls(
|
||||
most_common_value=counts.index[0],
|
||||
|
|
@ -368,7 +371,7 @@ class NumericalDistribution(BaseModel):
|
|||
return prepare_number_for_reporting(v, int if is_int(v) else float)
|
||||
|
||||
@classmethod
|
||||
def from_series(cls, series: Series) -> Self:
|
||||
def from_series(cls, series: pd.Series) -> Self:
|
||||
return cls(
|
||||
min=series.min(skipna=True),
|
||||
max=series.max(skipna=True),
|
||||
|
|
|
|||
|
|
@ -1,19 +1,25 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
from typing import TYPE_CHECKING, Annotated
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from data_designer.config.analysis.column_profilers import ColumnProfilerResultsT
|
||||
from data_designer.config.analysis.column_statistics import ColumnStatisticsT
|
||||
from data_designer.config.analysis.utils.reporting import ReportSection, generate_analysis_report
|
||||
from data_designer.config.column_types import DataDesignerColumnType, get_column_display_order
|
||||
from data_designer.config.analysis.utils.reporting import generate_analysis_report
|
||||
from data_designer.config.column_types import get_column_display_order
|
||||
from data_designer.config.utils.constants import EPSILON
|
||||
from data_designer.config.utils.numerical_helpers import prepare_number_for_reporting
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.config.analysis.utils.reporting import ReportSection
|
||||
from data_designer.config.column_types import DataDesignerColumnType
|
||||
|
||||
|
||||
class DatasetProfilerResults(BaseModel):
|
||||
"""Container for complete dataset profiling and analysis results.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.errors import DataDesignerError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ from rich.rule import Rule
|
|||
from rich.table import Column, Table
|
||||
from rich.text import Text
|
||||
|
||||
from data_designer.config.analysis.column_statistics import CategoricalHistogramData
|
||||
from data_designer.config.analysis.utils.errors import AnalysisReportError
|
||||
from data_designer.config.column_types import (
|
||||
DataDesignerColumnType,
|
||||
|
|
@ -29,6 +28,7 @@ from data_designer.config.utils.visualization import (
|
|||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.config.analysis.column_statistics import CategoricalHistogramData
|
||||
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
||||
|
||||
HEADER_STYLE = "dim"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Annotated, Literal
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.errors import DataDesignerError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.config.analysis.column_profilers import JudgeScoreProfilerConfig
|
||||
from data_designer.config.column_configs import (
|
||||
EmbeddingColumnConfig,
|
||||
|
|
|
|||
|
|
@ -6,13 +6,14 @@ from __future__ import annotations
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Generic, Protocol, TypeVar
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from data_designer.config.models import ModelConfig, ModelProvider
|
||||
from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS
|
||||
from data_designer.config.utils.info import InterfaceInfo
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
||||
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
||||
from data_designer.config.preview_results import PreviewResults
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Any, Generic, Literal, TypeVar
|
||||
from typing import TYPE_CHECKING, Annotated, Any, Generic, Literal, TypeVar
|
||||
|
||||
import numpy as np
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
from typing_extensions import Self, TypeAlias
|
||||
|
||||
|
|
@ -20,6 +21,10 @@ from data_designer.config.utils.constants import (
|
|||
MIN_TOP_P,
|
||||
)
|
||||
from data_designer.config.utils.io_helpers import smart_load_yaml
|
||||
from data_designer.lazy_heavy_imports import np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,12 +3,16 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
||||
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
||||
from data_designer.config.dataset_metadata import DatasetMetadata
|
||||
from data_designer.config.utils.visualization import WithRecordSamplerMixin
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class PreviewResults(WithRecordSamplerMixin):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from abc import ABC
|
||||
from enum import Enum
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import Field, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
import pandas as pd
|
||||
from pydantic import Field, field_validator, model_validator
|
||||
from typing_extensions import Self, TypeAlias
|
||||
|
||||
|
|
@ -16,6 +17,10 @@ from data_designer.config.utils.constants import (
|
|||
MAX_AGE,
|
||||
MIN_AGE,
|
||||
)
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class SamplerType(str, Enum):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import Field, model_validator
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from abc import ABC
|
||||
from typing import Literal
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
import pandas as pd
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
from pydantic.json_schema import SkipJsonSchema
|
||||
from typing_extensions import Self
|
||||
|
|
@ -14,6 +15,10 @@ from data_designer.config.utils.io_helpers import (
|
|||
validate_dataset_file_path,
|
||||
validate_path_contains_files_of_type,
|
||||
)
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class SeedSource(BaseModel, ABC):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from pydantic import Field
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.errors import DataDesignerError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Literal, TypeVar
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -8,13 +10,16 @@ from datetime import date, datetime, timedelta
|
|||
from decimal import Decimal
|
||||
from numbers import Number
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import yaml
|
||||
|
||||
from data_designer.config.errors import InvalidFileFormatError, InvalidFilePathError
|
||||
from data_designer.lazy_heavy_imports import np, pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numbers
|
||||
from numbers import Number
|
||||
from typing import Any
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
from enum import Enum
|
||||
from typing import Any, Literal, get_args, get_origin
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@ from enum import Enum
|
|||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from rich.console import Console, Group
|
||||
from rich.padding import Padding
|
||||
from rich.panel import Panel
|
||||
|
|
@ -28,8 +26,12 @@ from data_designer.config.sampler_params import SamplerType
|
|||
from data_designer.config.utils.code_lang import code_lang_to_syntax_lexer
|
||||
from data_designer.config.utils.constants import NVIDIA_API_KEY_ENV_VAR_NAME, OPENAI_API_KEY_ENV_VAR_NAME
|
||||
from data_designer.config.utils.errors import DatasetSampleDisplayError
|
||||
from data_designer.lazy_heavy_imports import np, pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
||||
from data_designer.config.dataset_metadata import DatasetMetadata
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pandas as pd
|
||||
from pydantic import BaseModel, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
|
|
@ -14,6 +14,10 @@ from data_designer.config.base import ConfigBase
|
|||
from data_designer.config.column_configs import SingleColumnConfig
|
||||
from data_designer.config.column_types import DataDesignerColumnType
|
||||
from data_designer.engine.configurable_task import ConfigurableTask, TaskConfigT
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,19 +5,16 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
import random
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.config.analysis.column_profilers import (
|
||||
JudgeScoreProfilerConfig,
|
||||
JudgeScoreProfilerResults,
|
||||
JudgeScoreSample,
|
||||
JudgeScoreSummary,
|
||||
)
|
||||
from data_designer.config.analysis.column_statistics import (
|
||||
CategoricalDistribution,
|
||||
CategoricalHistogramData,
|
||||
ColumnDistributionType,
|
||||
MissingValue,
|
||||
NumericalDistribution,
|
||||
)
|
||||
from data_designer.config.column_types import DataDesignerColumnType
|
||||
from data_designer.engine.analysis.column_profilers.base import ColumnConfigWithDataFrame, ColumnProfiler
|
||||
|
|
@ -25,9 +22,17 @@ from data_designer.engine.analysis.utils.judge_score_processing import (
|
|||
extract_judge_score_distributions,
|
||||
sample_scores_and_reasoning,
|
||||
)
|
||||
from data_designer.engine.models.facade import ModelFacade
|
||||
from data_designer.engine.models.recipes.response_recipes import TextResponseRecipe
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.config.analysis.column_profilers import JudgeScoreSample
|
||||
from data_designer.config.analysis.column_statistics import (
|
||||
CategoricalDistribution,
|
||||
CategoricalHistogramData,
|
||||
NumericalDistribution,
|
||||
)
|
||||
from data_designer.engine.models.facade import ModelFacade
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.config.analysis.column_profilers import ColumnProfilerType
|
||||
from data_designer.config.base import ConfigBase
|
||||
from data_designer.engine.analysis.column_profilers.base import ColumnProfiler
|
||||
|
|
|
|||
|
|
@ -4,9 +4,8 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, TypeAlias
|
||||
from typing import TYPE_CHECKING, Any, TypeAlias
|
||||
|
||||
import pandas as pd
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import Self
|
||||
|
||||
|
|
@ -25,6 +24,10 @@ from data_designer.engine.analysis.utils.column_statistics_calculations import (
|
|||
calculate_token_stats,
|
||||
calculate_validation_column_info,
|
||||
)
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Sequence
|
||||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from pydantic import Field, field_validator
|
||||
|
||||
from data_designer.config.analysis.column_profilers import ColumnProfilerConfigT
|
||||
|
|
@ -21,6 +22,11 @@ from data_designer.engine.analysis.utils.column_statistics_calculations import h
|
|||
from data_designer.engine.dataset_builders.multi_column_configs import DatasetBuilderColumnConfigT, MultiColumnConfig
|
||||
from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry
|
||||
from data_designer.engine.resources.resource_provider import ResourceProvider
|
||||
from data_designer.lazy_heavy_imports import pa, pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.errors import DataDesignerError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,11 +5,8 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
from numbers import Number
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import tiktoken
|
||||
|
||||
from data_designer.config.analysis.column_statistics import (
|
||||
|
|
@ -26,6 +23,12 @@ from data_designer.engine.column_generators.utils.prompt_renderer import (
|
|||
RecordBasedPromptRenderer,
|
||||
create_response_recipe,
|
||||
)
|
||||
from data_designer.lazy_heavy_imports import np, pa, pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
|
||||
RANDOM_SEED = 42
|
||||
MAX_PROMPT_SAMPLE_SIZE = 1000
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from data_designer.config.analysis.column_profilers import JudgeScoreDistributions, JudgeScoreSample
|
||||
from data_designer.config.analysis.column_statistics import (
|
||||
|
|
@ -15,6 +15,10 @@ from data_designer.config.analysis.column_statistics import (
|
|||
NumericalDistribution,
|
||||
)
|
||||
from data_designer.config.column_configs import LLMJudgeColumnConfig
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -9,16 +9,16 @@ from abc import ABC, abstractmethod
|
|||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, overload
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from data_designer.engine.configurable_task import ConfigurableTask, DataT, TaskConfigT
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
from data_designer.config.models import BaseInferenceParams, ModelConfig
|
||||
from data_designer.engine.models.facade import ModelFacade
|
||||
from data_designer.engine.models.registry import ModelRegistry
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel, computed_field
|
||||
|
||||
|
|
|
|||
|
|
@ -4,14 +4,17 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.config.column_configs import ExpressionColumnConfig
|
||||
from data_designer.engine.column_generators.generators.base import ColumnGeneratorFullColumn
|
||||
from data_designer.engine.column_generators.utils.errors import ExpressionTemplateRenderError
|
||||
from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering
|
||||
from data_designer.engine.processing.utils import deserialize_json_values
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import logging
|
||||
|
||||
|
|
|
|||
|
|
@ -6,9 +6,7 @@ from __future__ import annotations
|
|||
import logging
|
||||
import random
|
||||
from functools import partial
|
||||
from typing import Callable
|
||||
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from data_designer.config.utils.constants import LOCALES_WITH_MANAGED_DATASETS
|
||||
from data_designer.engine.column_generators.generators.base import FromScratchColumnGenerator, GenerationStrategy
|
||||
|
|
@ -18,6 +16,10 @@ from data_designer.engine.resources.managed_dataset_generator import ManagedData
|
|||
from data_designer.engine.sampling_gen.data_sources.sources import SamplerType
|
||||
from data_designer.engine.sampling_gen.entities.person import load_person_data_sampler
|
||||
from data_designer.engine.sampling_gen.generator import DatasetGenerator as SamplingDatasetGenerator
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,20 +1,22 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import logging
|
||||
|
||||
import duckdb
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.config.seed import IndexRange, PartitionBlock, SamplingStrategy
|
||||
from data_designer.engine.column_generators.generators.base import FromScratchColumnGenerator, GenerationStrategy
|
||||
from data_designer.engine.column_generators.utils.errors import SeedDatasetError
|
||||
from data_designer.engine.dataset_builders.multi_column_configs import SeedDatasetMultiColumnConfig
|
||||
from data_designer.engine.processing.utils import concat_datasets
|
||||
from data_designer.lazy_heavy_imports import duckdb, pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import duckdb
|
||||
import pandas as pd
|
||||
|
||||
MAX_ZERO_RECORD_RESPONSE_FACTOR = 2
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.config.column_configs import ValidationColumnConfig
|
||||
from data_designer.config.errors import InvalidConfigError
|
||||
|
|
@ -22,6 +21,10 @@ from data_designer.engine.validators import (
|
|||
SQLValidator,
|
||||
ValidationResult,
|
||||
)
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.config.base import ConfigBase
|
||||
from data_designer.config.column_configs import (
|
||||
EmbeddingColumnConfig,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.engine.errors import DataDesignerError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.config.column_types import DataDesignerColumnType
|
||||
from data_designer.config.utils.type_helpers import resolve_string_enum
|
||||
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModelRegistry
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, create_model
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from data_designer.config.column_configs import SeedDatasetColumnConfig
|
||||
|
|
|
|||
|
|
@ -1,15 +1,19 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC
|
||||
from pathlib import Path
|
||||
from typing import Generic, TypeVar, get_origin
|
||||
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING, Generic, TypeVar, get_origin
|
||||
|
||||
from data_designer.config.base import ConfigBase
|
||||
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
|
||||
from data_designer.engine.resources.resource_provider import ResourceProvider
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
DataT = TypeVar("DataT", dict, pd.DataFrame)
|
||||
TaskConfigT = TypeVar("ConfigT", bound=ConfigBase)
|
||||
|
|
|
|||
|
|
@ -1,19 +1,25 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pandas as pd
|
||||
from pydantic import BaseModel, field_validator, model_validator
|
||||
|
||||
from data_designer.config.utils.io_helpers import read_parquet_dataset
|
||||
from data_designer.config.utils.type_helpers import StrEnum, resolve_string_enum
|
||||
from data_designer.engine.dataset_builders.errors import ArtifactStorageError
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,6 @@ import uuid
|
|||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from data_designer.config.column_types import ColumnConfigT
|
||||
from data_designer.config.dataset_builders import BuildStage
|
||||
from data_designer.config.processors import (
|
||||
|
|
@ -40,14 +38,16 @@ from data_designer.engine.processing.processors.base import Processor
|
|||
from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor
|
||||
from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry
|
||||
from data_designer.engine.resources.resource_provider import ResourceProvider
|
||||
from data_designer.lazy_heavy_imports import pd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModelRegistry
|
||||
from data_designer.engine.models.usage import ModelUsageStats
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_CLIENT_VERSION: str = importlib.metadata.version("data_designer")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.engine.errors import DataDesignerError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC
|
||||
from typing import TypeAlias
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.config.column_types import DataDesignerColumnType
|
||||
from data_designer.config.data_designer_config import DataDesignerConfig
|
||||
from data_designer.config.processors import ProcessorConfig
|
||||
|
|
|
|||
|
|
@ -1,13 +1,18 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import logging
|
||||
from __future__ import annotations
|
||||
|
||||
import networkx as nx
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.config.column_types import ColumnConfigT
|
||||
from data_designer.engine.column_generators.utils.generator_classification import column_type_used_in_execution_dag
|
||||
from data_designer.engine.dataset_builders.utils.errors import DAGCircularDependencyError
|
||||
from data_designer.lazy_heavy_imports import nx
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import networkx as nx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,16 +1,20 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Callable, Container, Iterator
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow.parquet as pq
|
||||
from typing import TYPE_CHECKING, Callable, Container, Iterator
|
||||
|
||||
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage, BatchStage
|
||||
from data_designer.engine.dataset_builders.utils.errors import DatasetBatchManagementError
|
||||
from data_designer.lazy_heavy_imports import pd, pq
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from data_designer.engine.errors import DataDesignerError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from data_designer.errors import DataDesignerError
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import cached_property
|
||||
|
||||
from pydantic import BaseModel, field_validator, model_validator
|
||||
|
|
|
|||
|
|
@ -6,25 +6,15 @@ from __future__ import annotations
|
|||
import logging
|
||||
from collections.abc import Callable
|
||||
from functools import wraps
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from litellm.exceptions import (
|
||||
APIConnectionError,
|
||||
APIError,
|
||||
AuthenticationError,
|
||||
BadRequestError,
|
||||
ContextWindowExceededError,
|
||||
InternalServerError,
|
||||
NotFoundError,
|
||||
PermissionDeniedError,
|
||||
RateLimitError,
|
||||
Timeout,
|
||||
UnprocessableEntityError,
|
||||
UnsupportedParamsError,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
from data_designer.engine.errors import DataDesignerError
|
||||
from data_designer.lazy_heavy_imports import litellm
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import litellm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -132,10 +122,10 @@ def handle_llm_exceptions(
|
|||
err_msg_parser = DownstreamLLMExceptionMessageParser(model_name, model_provider_name, purpose)
|
||||
match exception:
|
||||
# Common errors that can come from LiteLLM
|
||||
case APIError():
|
||||
case litellm.exceptions.APIError():
|
||||
raise err_msg_parser.parse_api_error(exception, authentication_error) from None
|
||||
|
||||
case APIConnectionError():
|
||||
case litellm.exceptions.APIConnectionError():
|
||||
raise ModelAPIConnectionError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"Connection to model {model_name!r} hosted on model provider {model_provider_name!r} failed while {purpose}.",
|
||||
|
|
@ -143,13 +133,13 @@ def handle_llm_exceptions(
|
|||
)
|
||||
) from None
|
||||
|
||||
case AuthenticationError():
|
||||
case litellm.exceptions.AuthenticationError():
|
||||
raise ModelAuthenticationError(authentication_error) from None
|
||||
|
||||
case ContextWindowExceededError():
|
||||
case litellm.exceptions.ContextWindowExceededError():
|
||||
raise err_msg_parser.parse_context_window_exceeded_error(exception) from None
|
||||
|
||||
case UnsupportedParamsError():
|
||||
case litellm.exceptions.UnsupportedParamsError():
|
||||
raise ModelUnsupportedParamsError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"One or more of the parameters you provided were found to be unsupported by model {model_name!r} while {purpose}.",
|
||||
|
|
@ -157,10 +147,10 @@ def handle_llm_exceptions(
|
|||
)
|
||||
) from None
|
||||
|
||||
case BadRequestError():
|
||||
case litellm.exceptions.BadRequestError():
|
||||
raise err_msg_parser.parse_bad_request_error(exception) from None
|
||||
|
||||
case InternalServerError():
|
||||
case litellm.exceptions.InternalServerError():
|
||||
raise ModelInternalServerError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"Model {model_name!r} is currently experiencing internal server issues while {purpose}.",
|
||||
|
|
@ -168,7 +158,7 @@ def handle_llm_exceptions(
|
|||
)
|
||||
) from None
|
||||
|
||||
case NotFoundError():
|
||||
case litellm.exceptions.NotFoundError():
|
||||
raise ModelNotFoundError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"The specified model {model_name!r} could not be found while {purpose}.",
|
||||
|
|
@ -176,7 +166,7 @@ def handle_llm_exceptions(
|
|||
)
|
||||
) from None
|
||||
|
||||
case PermissionDeniedError():
|
||||
case litellm.exceptions.PermissionDeniedError():
|
||||
raise ModelPermissionDeniedError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"Your API key was found to lack the necessary permissions to use model {model_name!r} while {purpose}.",
|
||||
|
|
@ -184,7 +174,7 @@ def handle_llm_exceptions(
|
|||
)
|
||||
) from None
|
||||
|
||||
case RateLimitError():
|
||||
case litellm.exceptions.RateLimitError():
|
||||
raise ModelRateLimitError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"You have exceeded the rate limit for model {model_name!r} while {purpose}.",
|
||||
|
|
@ -192,7 +182,7 @@ def handle_llm_exceptions(
|
|||
)
|
||||
) from None
|
||||
|
||||
case Timeout():
|
||||
case litellm.exceptions.Timeout():
|
||||
raise ModelTimeoutError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"The request to model {model_name!r} timed out while {purpose}.",
|
||||
|
|
@ -200,7 +190,7 @@ def handle_llm_exceptions(
|
|||
)
|
||||
) from None
|
||||
|
||||
case UnprocessableEntityError():
|
||||
case litellm.exceptions.UnprocessableEntityError():
|
||||
raise ModelUnprocessableEntityError(
|
||||
FormattedLLMErrorMessage(
|
||||
cause=f"The request to model {model_name!r} failed despite correct request format while {purpose}.",
|
||||
|
|
@ -264,7 +254,7 @@ class DownstreamLLMExceptionMessageParser:
|
|||
self.model_provider_name = model_provider_name
|
||||
self.purpose = purpose
|
||||
|
||||
def parse_bad_request_error(self, exception: BadRequestError) -> DataDesignerError:
|
||||
def parse_bad_request_error(self, exception: litellm.exceptions.BadRequestError) -> DataDesignerError:
|
||||
err_msg = FormattedLLMErrorMessage(
|
||||
cause=f"The request for model {self.model_name!r} was found to be malformed or missing required parameters while {self.purpose}.",
|
||||
solution="Check your request parameters and try again.",
|
||||
|
|
@ -276,7 +266,9 @@ class DownstreamLLMExceptionMessageParser:
|
|||
)
|
||||
return ModelBadRequestError(err_msg)
|
||||
|
||||
def parse_context_window_exceeded_error(self, exception: ContextWindowExceededError) -> DataDesignerError:
|
||||
def parse_context_window_exceeded_error(
|
||||
self, exception: litellm.exceptions.ContextWindowExceededError
|
||||
) -> DataDesignerError:
|
||||
cause = f"The input data for model '{self.model_name}' was found to exceed its supported context width while {self.purpose}."
|
||||
try:
|
||||
if "OpenAIException - This model's maximum context length is " in str(exception):
|
||||
|
|
@ -295,7 +287,7 @@ class DownstreamLLMExceptionMessageParser:
|
|||
)
|
||||
|
||||
def parse_api_error(
|
||||
self, exception: InternalServerError, auth_error_msg: FormattedLLMErrorMessage
|
||||
self, exception: litellm.exceptions.InternalServerError, auth_error_msg: FormattedLLMErrorMessage
|
||||
) -> DataDesignerError:
|
||||
if "Error code: 403" in str(exception):
|
||||
return ModelAuthenticationError(auth_error_msg)
|
||||
|
|
|
|||
|
|
@ -6,10 +6,7 @@ from __future__ import annotations
|
|||
import logging
|
||||
from collections.abc import Callable
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
from litellm.types.router import DeploymentTypedDict, LiteLLM_Params
|
||||
from litellm.types.utils import EmbeddingResponse, ModelResponse
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from data_designer.config.models import GenerationType, ModelConfig, ModelProvider
|
||||
from data_designer.engine.model_provider import ModelProviderRegistry
|
||||
|
|
@ -23,6 +20,10 @@ from data_designer.engine.models.parsers.errors import ParserException
|
|||
from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
|
||||
from data_designer.engine.models.utils import prompt_to_messages, str_to_message
|
||||
from data_designer.engine.secret_resolver import SecretResolver
|
||||
from data_designer.lazy_heavy_imports import litellm
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import litellm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -65,7 +66,9 @@ class ModelFacade:
|
|||
def usage_stats(self) -> ModelUsageStats:
|
||||
return self._usage_stats
|
||||
|
||||
def completion(self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs) -> ModelResponse:
|
||||
def completion(
|
||||
self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs
|
||||
) -> litellm.ModelResponse:
|
||||
logger.debug(
|
||||
f"Prompting model {self.model_name!r}...",
|
||||
extra={"model": self.model_name, "messages": messages},
|
||||
|
|
@ -236,14 +239,14 @@ class ModelFacade:
|
|||
) from exc
|
||||
return output_obj, reasoning_trace
|
||||
|
||||
def _get_litellm_deployment(self, model_config: ModelConfig) -> DeploymentTypedDict:
|
||||
def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict:
|
||||
provider = self._model_provider_registry.get_provider(model_config.provider)
|
||||
api_key = None
|
||||
if provider.api_key:
|
||||
api_key = self._secret_resolver.resolve(provider.api_key)
|
||||
api_key = api_key or "not-used-but-required"
|
||||
|
||||
litellm_params = LiteLLM_Params(
|
||||
litellm_params = litellm.LiteLLM_Params(
|
||||
model=f"{provider.provider_type}/{model_config.model}",
|
||||
api_base=provider.endpoint,
|
||||
api_key=api_key,
|
||||
|
|
@ -253,7 +256,7 @@ class ModelFacade:
|
|||
"litellm_params": litellm_params.model_dump(),
|
||||
}
|
||||
|
||||
def _track_usage(self, response: ModelResponse | None) -> None:
|
||||
def _track_usage(self, response: litellm.types.utils.ModelResponse | None) -> None:
|
||||
if response is None:
|
||||
self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1))
|
||||
return
|
||||
|
|
@ -270,7 +273,7 @@ class ModelFacade:
|
|||
request_usage=RequestUsageStats(successful_requests=1, failed_requests=0),
|
||||
)
|
||||
|
||||
def _track_usage_from_embedding(self, response: EmbeddingResponse | None) -> None:
|
||||
def _track_usage_from_embedding(self, response: litellm.types.utils.EmbeddingResponse | None) -> None:
|
||||
if response is None:
|
||||
self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1))
|
||||
return
|
||||
|
|
|
|||
42
src/data_designer/engine/models/factory.py
Normal file
42
src/data_designer/engine/models/factory.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from data_designer.config.models import ModelConfig
|
||||
from data_designer.engine.model_provider import ModelProviderRegistry
|
||||
from data_designer.engine.secret_resolver import SecretResolver
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from data_designer.engine.models.registry import ModelRegistry
|
||||
|
||||
|
||||
def create_model_registry(
|
||||
*,
|
||||
model_configs: list[ModelConfig] | None = None,
|
||||
secret_resolver: SecretResolver,
|
||||
model_provider_registry: ModelProviderRegistry,
|
||||
) -> ModelRegistry:
|
||||
"""Factory function for creating a ModelRegistry instance.
|
||||
|
||||
Heavy dependencies (litellm, httpx) are deferred until this function is called.
|
||||
This is a factory function pattern - imports inside factories are idiomatic Python
|
||||
for lazy initialization.
|
||||
"""
|
||||
from data_designer.engine.models.facade import ModelFacade
|
||||
from data_designer.engine.models.litellm_overrides import apply_litellm_patches
|
||||
from data_designer.engine.models.registry import ModelRegistry
|
||||
|
||||
apply_litellm_patches()
|
||||
|
||||
def model_facade_factory(model_config, secret_resolver, model_provider_registry):
|
||||
return ModelFacade(model_config, secret_resolver, model_provider_registry)
|
||||
|
||||
return ModelRegistry(
|
||||
model_configs=model_configs,
|
||||
secret_resolver=secret_resolver,
|
||||
model_provider_registry=model_provider_registry,
|
||||
model_facade_factory=model_facade_factory,
|
||||
)
|
||||
|
|
@ -5,21 +5,26 @@ from __future__ import annotations
|
|||
|
||||
import random
|
||||
import threading
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
import litellm
|
||||
from litellm import RetryPolicy
|
||||
from litellm.caching.in_memory_cache import InMemoryCache
|
||||
from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
|
||||
from litellm.router import Router
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import override
|
||||
|
||||
from data_designer.lazy_heavy_imports import httpx, litellm
|
||||
from data_designer.logging import quiet_noisy_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import httpx
|
||||
import litellm
|
||||
|
||||
DEFAULT_MAX_CALLBACKS = 1000
|
||||
|
||||
|
||||
def _get_logging_callback_manager():
|
||||
"""Lazy accessor for LoggingCallbackManager to avoid loading litellm at import time."""
|
||||
return litellm.litellm_core_utils.logging_callback_manager.LoggingCallbackManager
|
||||
|
||||
|
||||
class LiteLLMRouterDefaultKwargs(BaseModel):
|
||||
## Number of seconds to wait initially after a connection
|
||||
## failure.
|
||||
|
|
@ -35,15 +40,15 @@ class LiteLLMRouterDefaultKwargs(BaseModel):
|
|||
|
||||
## Sets the default retry policy, including the number
|
||||
## of retries to use in particular scenarios.
|
||||
retry_policy: RetryPolicy = Field(
|
||||
default_factory=lambda: RetryPolicy(
|
||||
retry_policy: litellm.RetryPolicy = Field(
|
||||
default_factory=lambda: litellm.RetryPolicy(
|
||||
RateLimitErrorRetries=3,
|
||||
TimeoutErrorRetries=3,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class ThreadSafeCache(InMemoryCache):
|
||||
class ThreadSafeCache(litellm.caching.in_memory_cache.InMemoryCache):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
|
@ -78,7 +83,7 @@ class ThreadSafeCache(InMemoryCache):
|
|||
super().flush_cache()
|
||||
|
||||
|
||||
class CustomRouter(Router):
|
||||
class CustomRouter(litellm.router.Router):
|
||||
def __init__(
|
||||
self,
|
||||
*args,
|
||||
|
|
@ -155,7 +160,7 @@ def apply_litellm_patches():
|
|||
litellm.in_memory_llm_clients_cache = ThreadSafeCache()
|
||||
|
||||
# Workaround for the litellm issue described in https://github.com/BerriAI/litellm/issues/9792
|
||||
LoggingCallbackManager.MAX_CALLBACKS = DEFAULT_MAX_CALLBACKS
|
||||
_get_logging_callback_manager().MAX_CALLBACKS = DEFAULT_MAX_CALLBACKS
|
||||
|
||||
quiet_noisy_logger("httpx")
|
||||
quiet_noisy_logger("LiteLLM")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class ParserException(Exception):
|
||||
"""Identifies errors resulting from generic parser errors.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import reduce
|
||||
|
||||
import marko
|
||||
|
|
@ -80,13 +82,11 @@ class LLMResponseParser:
|
|||
code: str
|
||||
syntax: Optional[str] = None
|
||||
|
||||
|
||||
class CodeBlockParser:
|
||||
def __call__(self, element: _Element) -> CodeBlock:
|
||||
# Implementation details...
|
||||
return CodeBlock(code=element.text, syntax=element.get("class"))
|
||||
|
||||
|
||||
parser = LLMResponseParser(
|
||||
tag_parsers={
|
||||
"pre.code": CodeBlockParser(),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json_repair
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue