DataDesigner/packages/data-designer-engine/tests/engine/test_dataset_metadata.py
Johnny Greco ae0665fa16
refactor: slim package refactor into three subpackages (#240)
* remove old structure

* major shuffle

* streamline project configs

* update make commands

* updates to make commands

* remove essentials

* initialize logger in interface

* uv lock

* ignore notepad

* update workflows

* fix e2e project config

* generate colab notebooks

* resolve default model settings in interface

* fix build commands

* update perf import make command

* cleaning up some slop

* update recipes

* move conftest files to tests/

* update subpackage readmes

* streamline config_logging

* use exports

* update perf import usage pattern

* update for IDE behavior with ruff

* remove engine's fixtures file

* add note to about lazy imports

* update dependencies

* update docs

* doc fixes

* uv lock

* updates to catch up with main

* clean up makefile

* remove package gitignores

* define deps only once

* isolate tests

* add test for protetion rule

* create temp dirs for isolated tests

* catch up to main

* update headers

* re apply changes

* better result summaries for isolated tests

* move exports into top-level init

* fix client importlib version syntax

* catch up with main
2026-01-27 13:53:20 -05:00

56 lines
2.1 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from unittest.mock import MagicMock
from data_designer.config.dataset_metadata import DatasetMetadata
from data_designer.engine.resources.resource_provider import ResourceProvider
from data_designer.engine.resources.seed_reader import SeedReader
def test_dataset_metadata_defaults() -> None:
"""Test DatasetMetadata default values."""
metadata = DatasetMetadata()
assert metadata.seed_column_names == []
def test_dataset_metadata_with_seed_columns() -> None:
"""Test DatasetMetadata with seed column names."""
metadata = DatasetMetadata(seed_column_names=["name", "age", "city"])
assert metadata.seed_column_names == ["name", "age", "city"]
def test_get_dataset_metadata_with_seed_reader() -> None:
"""Test creating DatasetMetadata from ResourceProvider with seed reader."""
seed_reader = MagicMock(spec=SeedReader)
seed_reader.get_column_names.return_value = ["col1", "col2"]
resource_provider = MagicMock(spec=ResourceProvider)
resource_provider.seed_reader = seed_reader
metadata = ResourceProvider.get_dataset_metadata(resource_provider)
assert metadata.seed_column_names == ["col1", "col2"]
seed_reader.get_column_names.assert_called_once()
def test_get_dataset_metadata_without_seed_reader() -> None:
"""Test creating DatasetMetadata from ResourceProvider without seed reader."""
resource_provider = MagicMock(spec=ResourceProvider)
resource_provider.seed_reader = None
metadata = ResourceProvider.get_dataset_metadata(resource_provider)
assert metadata.seed_column_names == []
def test_dataset_metadata_is_serializable() -> None:
"""Test that DatasetMetadata can be serialized to JSON."""
metadata = DatasetMetadata(seed_column_names=["name", "age"])
json_data = metadata.model_dump_json()
assert '"seed_column_names":["name","age"]' in json_data
# Can be deserialized back
restored = DatasetMetadata.model_validate_json(json_data)
assert restored.seed_column_names == ["name", "age"]