DataDesigner/tests/engine/models/test_usage.py
Nabin Mulepati 8370e4a00b
feat: support native embedding generation (#106)
* Add generation type to ModelConfig

* pass tests

* added generate_text_embeddings

* tests

* remove sensitive=True old artifact no longer needed

* Slight refactor

* slight refactor

* Added embedding generator

* chunk_separator -> chunk_pattern

* update tests

* rename for consistency

* Restructure InferenceParameters -> CompletionInferenceParameters, BaseInferenceParameters, EmbeddingInferenceParameters

* Remove purpose from consolidated kwargs

* WithModelConfiguration.inference_parameters should should be typed with BaseInferenceParameters

* Type as WithModelGeneration

* Add image generation modality

* update return type for generate_kwargs

* make generation_type a field of ModelConfig as opposed to a prop resolved based on the type of InferenceParameters

* remove regex based chunking from embedding generator

* Remove image generation for now

* more tests and updates

* column_type_is_llm_generated -> column_type_is_model_generated

* change set to list: fix flaky tests

* CompletionInferenceParameters -> ChatCompletionInferenceParameters for consistency with generation_type

* Update docs

* fix deprecation warning originating from cli model settings

* update display of inference parameters in cli list

* save prog on inference parameter

* updates for the ocnfig builder

* update cli readme

* update cli for inference parmeters

* update inference parameter names

* flip order of vars

* WithCompletion -> WithChatCompletion

* specify InferenceParamsT

* Update columns.md with EmbeddingColumnConfig info

* make generation_type a descriminator field in inference params. add configuration support for max_parallel_requests and timeout

* DRY out some stuff in field.py

* Update nomenclature. prompt tokens -> input tokens, completion tokens -> output tokens in column statistics for consistency

* Add nvidia-embedding and openai-embedding to default model configs

* Fix typo in docs

* Make generate collab notebooks

* fine-tune -> adjust
2025-12-15 11:03:33 -07:00

65 lines
2.8 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
def test_token_usage_stats():
token_usage_stats = TokenUsageStats()
assert token_usage_stats.input_tokens == 0
assert token_usage_stats.output_tokens == 0
assert token_usage_stats.total_tokens == 0
assert token_usage_stats.has_usage is False
token_usage_stats.extend(input_tokens=10, output_tokens=20)
assert token_usage_stats.input_tokens == 10
assert token_usage_stats.output_tokens == 20
assert token_usage_stats.total_tokens == 30
assert token_usage_stats.has_usage is True
def test_request_usage_stats():
request_usage_stats = RequestUsageStats()
assert request_usage_stats.successful_requests == 0
assert request_usage_stats.failed_requests == 0
assert request_usage_stats.total_requests == 0
assert request_usage_stats.has_usage is False
request_usage_stats.extend(successful_requests=10, failed_requests=20)
assert request_usage_stats.successful_requests == 10
assert request_usage_stats.failed_requests == 20
assert request_usage_stats.total_requests == 30
assert request_usage_stats.has_usage is True
def test_model_usage_stats():
model_usage_stats = ModelUsageStats()
assert model_usage_stats.token_usage.input_tokens == 0
assert model_usage_stats.token_usage.output_tokens == 0
assert model_usage_stats.request_usage.successful_requests == 0
assert model_usage_stats.request_usage.failed_requests == 0
assert model_usage_stats.has_usage is False
assert model_usage_stats.get_usage_stats(total_time_elapsed=10) == {
"token_usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
"request_usage": {"successful_requests": 0, "failed_requests": 0, "total_requests": 0},
"tokens_per_second": 0,
"requests_per_minute": 0,
}
model_usage_stats.extend(
token_usage=TokenUsageStats(input_tokens=10, output_tokens=20),
request_usage=RequestUsageStats(successful_requests=2, failed_requests=1),
)
assert model_usage_stats.token_usage.input_tokens == 10
assert model_usage_stats.token_usage.output_tokens == 20
assert model_usage_stats.request_usage.successful_requests == 2
assert model_usage_stats.request_usage.failed_requests == 1
assert model_usage_stats.has_usage is True
assert model_usage_stats.get_usage_stats(total_time_elapsed=2) == {
"token_usage": {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
"request_usage": {"successful_requests": 2, "failed_requests": 1, "total_requests": 3},
"tokens_per_second": 15,
"requests_per_minute": 90,
}