DataDesigner/tests/engine/analysis/column_profilers/test_base.py
Johnny Greco fdbc012989
feat: 🔌 Initial plugin system implementation (#23)
* separate column configs and types

* create plugin object

* create plugin manager

* fix config integration

* make base task registry raise on collision false by default

* update registry test after raise on collision default update

* make analysis work using general stats calculation

* default -> builtin

* use entry point approach instead

* rewire using plugin helpers

* add env var to disable plugins

* fix tests

* update plugin manager tests

* add tests for plugin helpers

* update license headers

* add emoji

* not using the pm in the builder code

* Update src/data_designer/plugins/manager.py

Co-authored-by: Nabin Mulepati <nmulepati@nvidia.com>

* Update src/data_designer/plugins/manager.py

Co-authored-by: Nabin Mulepati <nmulepati@nvidia.com>

* Update src/data_designer/plugins/manager.py

Co-authored-by: Nabin Mulepati <nmulepati@nvidia.com>

* merge plugin registry into the manager

* small pr feedback

* client side plugin manager

* builtin -> default; move adding plugins to registry

* update method names to better match what they do

* use register verb for consistency with other registries

* thread safety updates; make discover private

---------

Co-authored-by: Nabin Mulepati <nmulepati@nvidia.com>
2025-11-11 15:36:52 -05:00

67 lines
2.8 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import pandas as pd
from pydantic import ValidationError
import pytest
from data_designer.config.column_configs import SamplerColumnConfig
from data_designer.config.column_types import DataDesignerColumnType
from data_designer.config.sampler_params import SamplerType
from data_designer.engine.analysis.column_profilers.base import (
ColumnConfigWithDataFrame,
ColumnProfilerMetadata,
)
def test_column_config_with_dataframe_valid_column_config_with_dataframe():
df = pd.DataFrame({"test_column": [1, 2, 3]})
column_config = SamplerColumnConfig(name="test_column", sampler_type=SamplerType.CATEGORY, params={})
config_with_df = ColumnConfigWithDataFrame(column_config=column_config, df=df)
assert config_with_df.column_config.name == "test_column"
assert "test_column" in config_with_df.df.columns
assert config_with_df.df["test_column"].tolist() == [1, 2, 3]
def test_column_config_with_dataframe_column_not_found_validation_error():
df = pd.DataFrame({"other_column": [1, 2, 3]})
column_config = SamplerColumnConfig(name="test_column", sampler_type=SamplerType.CATEGORY, params={})
with pytest.raises(ValidationError, match="Column 'test_column' not found in DataFrame"):
ColumnConfigWithDataFrame(column_config=column_config, df=df)
def test_column_config_with_dataframe_pyarrow_backend_conversion():
df = pd.DataFrame({"test_column": [1, 2, 3]})
column_config = SamplerColumnConfig(name="test_column", sampler_type=SamplerType.CATEGORY, params={})
config_with_df = ColumnConfigWithDataFrame(column_config=column_config, df=df)
assert all(isinstance(dtype, pd.ArrowDtype) for dtype in config_with_df.df.dtypes)
def test_column_config_with_dataframe_as_tuple_method():
df = pd.DataFrame({"test_column": [1, 2, 3]})
column_config = SamplerColumnConfig(name="test_column", sampler_type=SamplerType.CATEGORY, params={})
config_with_df = ColumnConfigWithDataFrame(column_config=column_config, df=df)
column_config_result, df_result = config_with_df.as_tuple()
assert column_config_result == column_config
assert df_result["test_column"].tolist() == df["test_column"].tolist()
def test_column_profiler_metadata_creation():
metadata = ColumnProfilerMetadata(
name="test_profiler",
description="Test profiler",
applicable_column_types=[DataDesignerColumnType.SAMPLER, DataDesignerColumnType.LLM_TEXT],
required_resources=None,
)
assert metadata.name == "test_profiler"
assert metadata.description == "Test profiler"
assert metadata.applicable_column_types == [DataDesignerColumnType.SAMPLER, DataDesignerColumnType.LLM_TEXT]