DataDesigner/packages/data-designer-engine/tests/engine/test_capacity.py
Eric W. Tramel c0a4dcbb85
Some checks are pending
CI / End to end test (Python 3.13 on macos-latest) (push) Blocked by required conditions
CI / End to end test (Python 3.10 on ubuntu-latest) (push) Blocked by required conditions
CI / End to end test (Python 3.11 on ubuntu-latest) (push) Blocked by required conditions
CI / End to end test (Python 3.12 on ubuntu-latest) (push) Blocked by required conditions
CI / End to end test (Python 3.13 on ubuntu-latest) (push) Blocked by required conditions
CI / Lint and Format Check (push) Blocked by required conditions
CI / Check License Headers (push) Blocked by required conditions
CI / End to end test (Python 3.10 on macos-latest) (push) Blocked by required conditions
CI / Validate dispatched SHA (push) Waiting to run
CI / Test Config (Python 3.10 on macos-latest) (push) Blocked by required conditions
CI / Test Config (Python 3.11 on macos-latest) (push) Blocked by required conditions
CI / Test Config (Python 3.12 on macos-latest) (push) Blocked by required conditions
CI / Test Config (Python 3.13 on macos-latest) (push) Blocked by required conditions
CI / Test Config (Python 3.10 on ubuntu-latest) (push) Blocked by required conditions
CI / End to end test (Python 3.11 on macos-latest) (push) Blocked by required conditions
CI / Test Config (Python 3.11 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Config (Python 3.12 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Config (Python 3.13 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.10 on macos-latest) (push) Blocked by required conditions
CI / End to end test (Python 3.12 on macos-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.11 on macos-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.12 on macos-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.13 on macos-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.10 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.11 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.12 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Engine (Python 3.13 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.10 on macos-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.11 on macos-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.12 on macos-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.13 on macos-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.10 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.11 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.12 on ubuntu-latest) (push) Blocked by required conditions
CI / Test Interface (Python 3.13 on ubuntu-latest) (push) Blocked by required conditions
CI / Coverage Check (Python 3.11) (push) Blocked by required conditions
CI / Test (Python 3.10 on macos-latest) (push) Blocked by required conditions
CI / Test (Python 3.11 on macos-latest) (push) Blocked by required conditions
CI / Test (Python 3.12 on macos-latest) (push) Blocked by required conditions
CI / Test (Python 3.10 on ubuntu-latest) (push) Blocked by required conditions
CI / Test (Python 3.13 on macos-latest) (push) Blocked by required conditions
CI / Test (Python 3.11 on ubuntu-latest) (push) Blocked by required conditions
CI / Test (Python 3.12 on ubuntu-latest) (push) Blocked by required conditions
CI / Test (Python 3.13 on ubuntu-latest) (push) Blocked by required conditions
feat: implement async scheduling admission control (#661)
2026-05-20 20:58:05 -04:00

74 lines
3.5 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from data_designer.engine.capacity import (
AsyncCapacityConfigured,
AsyncCapacityObservedMaxima,
AsyncCapacityPlan,
AsyncCapacityRuntimeSnapshot,
CapacityValue,
RequestAdmissionConfigSnapshot,
RowGroupAdmission,
)
from data_designer.engine.models.request_admission.config import RequestAdmissionConfig
from data_designer.engine.models.request_admission.resources import RequestDomain, RequestResourceKey
from data_designer.engine.models.resources import ProviderModelKey, ProviderModelStaticCap
def test_request_admission_config_snapshot_records_resources() -> None:
resource = RequestResourceKey("nvidia", "nemotron", RequestDomain.CHAT)
config = RequestAdmissionConfig(
initial_limits={resource: 2},
max_limit_clamps={resource: 4},
startup_ramp_seconds=30.0,
)
snapshot = RequestAdmissionConfigSnapshot.from_config(config)
assert snapshot.resources == (resource,)
assert snapshot.initial_limits[resource] == 2
assert snapshot.max_limit_clamps[resource] == 4
assert snapshot.startup_ramp_seconds == 30.0
def test_async_capacity_plan_records_configured_runtime_and_maxima() -> None:
resource = RequestResourceKey("nvidia", "nemotron", RequestDomain.CHAT)
provider_model = ProviderModelKey("nvidia", "nemotron")
static_cap = ProviderModelStaticCap(cap=4, aliases=("default",), raw_caps={"default": 4})
plan = AsyncCapacityPlan(
configured=AsyncCapacityConfigured(
buffer_size=CapacityValue(value=16, source="run_config"),
row_group_admission=RowGroupAdmission(
row_group_concurrency=CapacityValue(value=2, source="dataset_builder"),
observed_in_flight=1,
),
submission_capacity=CapacityValue(value=8, source="engine_internal_config"),
task_resource_limits=CapacityValue(value={"submission": 8, "llm_wait": 4}, source="engine_internal_config"),
request_resources=CapacityValue(value=(resource,), source="runtime_snapshot"),
provider_model_static_caps=CapacityValue(value={provider_model: static_cap}, source="model_metadata"),
request_domain_initial_limits=CapacityValue(value={resource: 2}, source="engine_internal_config"),
request_admission_config=CapacityValue(
value=RequestAdmissionConfigSnapshot.from_config(RequestAdmissionConfig(initial_limits={resource: 2})),
source="engine_internal_config",
),
transport_pool_limits=CapacityValue(value={provider_model: 8}, source="adapter_config"),
),
runtime_snapshot=AsyncCapacityRuntimeSnapshot(
request_domain_current_limits={resource: 2},
request_domain_effective_max={resource: 4},
request_domain_blocked_until={resource: None},
provider_model_aggregate_in_flight={provider_model: 0},
),
observed_maxima=AsyncCapacityObservedMaxima(
row_groups_in_flight=1,
request_in_flight_by_resource={resource: 2},
provider_model_aggregate_in_flight={provider_model: 2},
),
)
assert plan.configured.provider_model_static_caps.value[provider_model].merge_rule == "min_same_endpoint"
assert plan.runtime_snapshot.request_domain_current_limits[resource] == 2
assert plan.observed_maxima.provider_model_aggregate_in_flight[provider_model] == 2