DataDesigner/tests_e2e/tests/test_e2e.py
Johnny Greco ae0665fa16
refactor: slim package refactor into three subpackages (#240)
* remove old structure

* major shuffle

* streamline project configs

* update make commands

* updates to make commands

* remove essentials

* initialize logger in interface

* uv lock

* ignore notepad

* update workflows

* fix e2e project config

* generate colab notebooks

* resolve default model settings in interface

* fix build commands

* update perf import make command

* cleaning up some slop

* update recipes

* move conftest files to tests/

* update subpackage readmes

* streamline config_logging

* use exports

* update perf import usage pattern

* update for IDE behavior with ruff

* remove engine's fixtures file

* add note to about lazy imports

* update dependencies

* update docs

* doc fixes

* uv lock

* updates to catch up with main

* clean up makefile

* remove package gitignores

* define deps only once

* isolate tests

* add test for protetion rule

* create temp dirs for isolated tests

* catch up to main

* update headers

* re apply changes

* better result summaries for isolated tests

* move exports into top-level init

* fix client importlib version syntax

* catch up with main
2026-01-27 13:53:20 -05:00

67 lines
2.2 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from pathlib import Path
import data_designer.config as dd
from data_designer.interface import DataDesigner
from data_designer_e2e_tests.plugins.column_generator.config import DemoColumnGeneratorConfig
from data_designer_e2e_tests.plugins.seed_reader.config import DemoSeedSource
def test_column_generator_plugin() -> None:
data_designer = DataDesigner()
config_builder = dd.DataDesignerConfigBuilder()
# This sampler column is necessary as a temporary workaround to https://github.com/NVIDIA-NeMo/DataDesigner/issues/4
config_builder.add_column(
dd.SamplerColumnConfig(
name="irrelevant",
sampler_type=dd.SamplerType.CATEGORY,
params=dd.CategorySamplerParams(values=["irrelevant"]),
)
)
config_builder.add_column(
DemoColumnGeneratorConfig(
name="upper",
text="hello world",
)
)
preview = data_designer.preview(config_builder)
capitalized = set(preview.dataset["upper"].values)
assert capitalized == {"HELLO WORLD"}
def test_seed_reader_plugin() -> None:
current_dir = Path(__file__).parent
data_designer = DataDesigner()
config_builder = dd.DataDesignerConfigBuilder()
config_builder.with_seed_dataset(
DemoSeedSource(
directory=str(current_dir),
filename="test_seed.csv",
)
)
# This sampler column is necessary as a temporary workaround to https://github.com/NVIDIA-NeMo/DataDesigner/issues/4
config_builder.add_column(
dd.SamplerColumnConfig(
name="irrelevant",
sampler_type=dd.SamplerType.CATEGORY,
params=dd.CategorySamplerParams(values=["irrelevant"]),
)
)
config_builder.add_column(
dd.ExpressionColumnConfig(
name="full_name",
expr="{{ first_name }} + {{ last_name }}",
)
)
preview = data_designer.preview(config_builder)
full_names = set(preview.dataset["full_name"].values)
assert full_names == {"John + Coltrane", "Miles + Davis", "Bill + Evans"}