ISSUE #3031 - Dynamic Sampling Config (#27184)

* feat: move flat sampling to sampling config + dynamic sampling option

* feat: move flat sampling on the backend to sample profile conifg object

* feat: fix circular import

* feat: align UI with new profiler config

* feat: fix json schema

* feat: align python imports with new schema path

* feat: update migration to look at extension

* feat: remove enable

* feat: remove enable

* feat: added titles to sample config

* feat: generated ts classes

* feat: addressed comments

* feat: change sample config instantiation to match new structure

* feat: removed backward compatible fields

* feat: ran java linting

* UI fixes, tests and locale changes

* fix failing test

* fix ui check style

* fix failing profiler test

* feat: fix ci failures

* feat: generated ts classes

* feat: fix ci failure

* fix: failing ci

* address comments

* fix failing test

* fix: ci failure

---------

Co-authored-by: Harshit Shah <dinkushah169@gmail.com>
This commit is contained in:
Teddy 2026-04-17 10:46:06 -07:00 committed by GitHub
parent 5ffff63c93
commit 47c88d49ce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
95 changed files with 2891 additions and 630 deletions

View file

@ -130,6 +130,86 @@ FROM user_entity ue, role_entity re
WHERE ue.name = 'mcpapplicationbot'
AND re.name = 'ApplicationBotImpersonationRole';
UPDATE entity_extension
SET json = JSON_SET(
json,
'$.profileSampleConfig',
JSON_OBJECT(
'sampleConfigType', 'STATIC',
'config', JSON_OBJECT(
'profileSample', JSON_EXTRACT(json, '$.profileSample'),
'profileSampleType', COALESCE(
JSON_EXTRACT(json, '$.profileSampleType'),
CAST('"PERCENTAGE"' AS JSON)
),
'samplingMethodType', JSON_EXTRACT(json, '$.samplingMethodType')
)
)
)
WHERE extension IN (
'table.tableProfilerConfig',
'database.databaseProfilerConfig',
'databaseSchema.databaseSchemaProfilerConfig'
)
AND JSON_EXTRACT(json, '$.profileSample') IS NOT NULL
AND JSON_TYPE(JSON_EXTRACT(json, '$.profileSample')) != 'NULL'
AND NOT JSON_CONTAINS_PATH(json, 'one', '$.profileSampleConfig');
-- entity_extension: remove old flat fields
UPDATE entity_extension
SET json = JSON_REMOVE(
JSON_REMOVE(
JSON_REMOVE(json, '$.samplingMethodType'),
'$.profileSampleType'
),
'$.profileSample'
)
WHERE extension IN (
'table.tableProfilerConfig',
'database.databaseProfilerConfig',
'databaseSchema.databaseSchemaProfilerConfig'
)
AND (JSON_CONTAINS_PATH(json, 'one', '$.profileSample')
OR JSON_CONTAINS_PATH(json, 'one', '$.profileSampleType')
OR JSON_CONTAINS_PATH(json, 'one', '$.samplingMethodType'));
-- ingestion_pipeline_entity (profiler pipelines): build profileSampleConfig (skip if already migrated)
UPDATE ingestion_pipeline_entity
SET json = JSON_SET(
json,
'$.sourceConfig.config.profileSampleConfig',
JSON_OBJECT(
'sampleConfigType', 'STATIC',
'config', JSON_OBJECT(
'profileSample', JSON_EXTRACT(json, '$.sourceConfig.config.profileSample'),
'profileSampleType', COALESCE(
JSON_EXTRACT(json, '$.sourceConfig.config.profileSampleType'),
CAST('"PERCENTAGE"' AS JSON)
),
'samplingMethodType', JSON_EXTRACT(json, '$.sourceConfig.config.samplingMethodType')
)
)
)
WHERE pipelineType = 'profiler'
AND JSON_EXTRACT(json, '$.sourceConfig.config.profileSample') IS NOT NULL
AND JSON_TYPE(JSON_EXTRACT(json, '$.sourceConfig.config.profileSample')) != 'NULL'
AND NOT JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSampleConfig');
-- ingestion_pipeline_entity (profiler pipelines): remove old flat fields
UPDATE ingestion_pipeline_entity
SET json = JSON_REMOVE(
JSON_REMOVE(
JSON_REMOVE(json, '$.sourceConfig.config.samplingMethodType'),
'$.sourceConfig.config.profileSampleType'
),
'$.sourceConfig.config.profileSample'
)
WHERE pipelineType = 'profiler'
AND (JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSample')
OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSampleType')
OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.samplingMethodType'));
-- RDF distributed indexing state tables
CREATE TABLE IF NOT EXISTS rdf_index_job (
id VARCHAR(36) NOT NULL,

View file

@ -151,6 +151,83 @@ WHERE ue.name = 'mcpapplicationbot'
AND re.name = 'ApplicationBotImpersonationRole'
ON CONFLICT DO NOTHING;
-- Migrate profiler sampling config: move flat profileSample/profileSampleType/samplingMethodType
-- into the new profileSampleConfig structure. Default to STATIC since DYNAMIC is new.
-- Profiler configs are stored in entity_extension table, not in entity json columns.
-- Extension keys: table.tableProfilerConfig, database.databaseProfilerConfig, databaseSchema.databaseSchemaProfilerConfig
-- The json column in entity_extension contains the config object directly (flat root-level fields).
-- entity_extension: build profileSampleConfig from existing flat fields (skip if already migrated)
UPDATE entity_extension
SET json = jsonb_set(
json::jsonb,
'{profileSampleConfig}',
jsonb_build_object(
'sampleConfigType', 'STATIC',
'config', jsonb_build_object(
'profileSample', json::jsonb #> '{profileSample}',
'profileSampleType', COALESCE(
json::jsonb #> '{profileSampleType}',
'"PERCENTAGE"'::jsonb
),
'samplingMethodType', json::jsonb #> '{samplingMethodType}'
)
)
)::json
WHERE extension IN (
'table.tableProfilerConfig',
'database.databaseProfilerConfig',
'databaseSchema.databaseSchemaProfilerConfig'
)
AND json::jsonb #>> '{profileSample}' IS NOT NULL
AND json::jsonb #> '{profileSampleConfig}' IS NULL;
-- entity_extension: remove old flat fields
UPDATE entity_extension
SET json = (json::jsonb #- '{profileSample}'
#- '{profileSampleType}'
#- '{samplingMethodType}')::json
WHERE extension IN (
'table.tableProfilerConfig',
'database.databaseProfilerConfig',
'databaseSchema.databaseSchemaProfilerConfig'
)
AND (json::jsonb #>> '{profileSample}' IS NOT NULL
OR json::jsonb #>> '{profileSampleType}' IS NOT NULL
OR json::jsonb #>> '{samplingMethodType}' IS NOT NULL);
-- ingestion_pipeline_entity (profiler pipelines): build profileSampleConfig (skip if already migrated)
UPDATE ingestion_pipeline_entity
SET json = jsonb_set(
json::jsonb,
'{sourceConfig,config,profileSampleConfig}',
jsonb_build_object(
'sampleConfigType', 'STATIC',
'config', jsonb_build_object(
'profileSample', json::jsonb #> '{sourceConfig,config,profileSample}',
'profileSampleType', COALESCE(
json::jsonb #> '{sourceConfig,config,profileSampleType}',
'"PERCENTAGE"'::jsonb
),
'samplingMethodType', json::jsonb #> '{sourceConfig,config,samplingMethodType}'
)
)
)::json
WHERE json #>> '{pipelineType}' = 'profiler'
AND json::jsonb #>> '{sourceConfig,config,profileSample}' IS NOT NULL
AND json::jsonb #> '{sourceConfig,config,profileSampleConfig}' IS NULL;
-- ingestion_pipeline_entity (profiler pipelines): remove old flat fields
UPDATE ingestion_pipeline_entity
SET json = (json::jsonb #- '{sourceConfig,config,profileSample}'
#- '{sourceConfig,config,profileSampleType}'
#- '{sourceConfig,config,samplingMethodType}')::json
WHERE json #>> '{pipelineType}' = 'profiler'
AND (json::jsonb #>> '{sourceConfig,config,profileSample}' IS NOT NULL
OR json::jsonb #>> '{sourceConfig,config,profileSampleType}' IS NOT NULL
OR json::jsonb #>> '{sourceConfig,config,samplingMethodType}' IS NOT NULL);
-- RDF distributed indexing state tables
CREATE TABLE IF NOT EXISTS rdf_index_job (
id VARCHAR(36) NOT NULL,

View file

@ -32,7 +32,12 @@ from metadata.generated.schema.metadataIngestion.workflow import (
)
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sampler_interface import SamplerInterface
from metadata.utils.bigquery_utils import copy_service_config
from metadata.utils.profiler_utils import get_context_entities
@ -126,9 +131,16 @@ class BaseTestSuiteRunner:
schema_entity=schema_entity,
database_entity=database_entity,
default_sample_config=SampleConfig(
profileSample=self.source_config.profileSample,
profileSampleType=self.source_config.profileSampleType,
samplingMethodType=self.source_config.samplingMethodType,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=self.source_config.profileSample,
profileSampleType=self.source_config.profileSampleType,
samplingMethodType=self.source_config.samplingMethodType,
),
)
if self.source_config.profileSample
else None,
),
)

View file

@ -38,7 +38,7 @@ from metadata.data_quality.validations.models import (
TableDiffRuntimeParameters,
TableParameter,
)
from metadata.generated.schema.entity.data.table import Column, ProfileSampleType
from metadata.generated.schema.entity.data.table import Column
from metadata.generated.schema.entity.services.connections.database.sapHanaConnection import (
SapHanaScheme,
)
@ -50,6 +50,7 @@ from metadata.generated.schema.tests.basic import (
TestCaseStatus,
TestResultValue,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.profiler.metrics.registry import Metrics
from metadata.profiler.orm.converter.base import build_orm_col
from metadata.profiler.orm.functions.md5 import MD5
@ -465,16 +466,19 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
on Table 1 and the hash will ensure that the same row is selected on Table 2. We want to avoid selecting rows
with different ids because the comparison will not be sensible.
"""
if (
# no sample configuration
self.runtime_params.table_profile_config is None
or self.runtime_params.table_profile_config.profileSample is None
# sample is 100% or in other words no sample is required
or (
self.runtime_params.table_profile_config.profileSampleType
== ProfileSampleType.PERCENTAGE
and self.runtime_params.table_profile_config.profileSample == 100
)
config = self.runtime_params.table_profile_config
if config is None:
return None, None
profile_sample_config = config.profileSampleConfig if config else None
sample_config = profile_sample_config.root if profile_sample_config else None
static = sample_config.config if sample_config else None
profile_sample = getattr(static, "profileSample", None) if static else None
profile_sample_type = (
getattr(static, "profileSampleType", None) if static else None
)
if profile_sample is None or (
profile_sample_type == ProfileSampleType.PERCENTAGE
and profile_sample == 100
):
return None, None
if DatabaseServiceType.Mssql in [
@ -520,26 +524,19 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin):
def calculate_nounce(self, max_nounce=2**32 - 1) -> int:
"""Calculate the nounce based on the profile sample configuration. The nounce is
the sample fraction projected to a number on a scale of 0 to max_nounce"""
if (
self.runtime_params.table_profile_config.profileSampleType
== ProfileSampleType.PERCENTAGE
):
return int(
max_nounce
* self.runtime_params.table_profile_config.profileSample
/ 100
)
if (
self.runtime_params.table_profile_config.profileSampleType
== ProfileSampleType.ROWS
):
config = self.runtime_params.table_profile_config
profile_sample_config = config.profileSampleConfig if config else None
sample_config = profile_sample_config.root if profile_sample_config else None
static = sample_config.config if sample_config else None
profile_sample = getattr(static, "profileSample", 100)
profile_sample_type = getattr(static, "profileSampleType", None)
if profile_sample_type == ProfileSampleType.PERCENTAGE:
return int(max_nounce * profile_sample / 100)
if profile_sample_type == ProfileSampleType.ROWS:
row_count = self.get_total_row_count()
if row_count is None:
raise ValueError("Row count is required for ROWS profile sample type")
return int(
max_nounce
* (self.runtime_params.table_profile_config.profileSample / row_count)
)
return int(max_nounce * (profile_sample / row_count))
raise ValueError("Invalid profile sample type")
def get_row_diff_test_case_result(

View file

@ -147,22 +147,23 @@ class PandasInterfaceMixin:
def yield_sampled_dfs():
dfs = raw_dataset
if sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
static = sample_config.get_static_config()
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
# Sampling based on percentage of rows will be applied to each dataframe chunk
# to ensure consistent efficiency across large dataset. Other option would be to
# either concatenate all dataframes (may cause OOM) or perform 2 passes (one to count rows,
# another to sample) which would be less efficient.
try:
percentage = sample_config.profileSample or 100
percentage = static.profileSample or 100
for df in dfs():
yield df.sample(frac=percentage / 100)
except Exception as exc:
logger.error(
f"Error sampling dataframes based on percentage {sample_config.profileSample}: {exc}"
f"Error sampling dataframes based on percentage {static.profileSample}: {exc}"
)
elif sample_config.profileSampleType == ProfileSampleType.ROWS:
elif static and static.profileSampleType == ProfileSampleType.ROWS:
try:
rows = sample_config.profileSample or 0
rows = static.profileSample or 0
streamed_rows = 0
for df in dfs():
n = len(df)
@ -174,7 +175,7 @@ class PandasInterfaceMixin:
break
except Exception as exc:
logger.error(
f"Error sampling dataframes based on rows {sample_config.profileSample}: {exc}"
f"Error sampling dataframes based on rows {static.profileSample}: {exc}"
)
else:
logger.warning(

View file

@ -548,13 +548,15 @@ class Profiler(Generic[TMetric]):
createDateTime=raw_create_date,
sizeInByte=self._table_results.get("sizeInBytes"),
profileSample=(
self.profiler_interface.sampler.sample_config.profileSample
self.profiler_interface.sampler.sample_config.get_static_config().profileSample
if self.profiler_interface.sampler.sample_config
and self.profiler_interface.sampler.sample_config.get_static_config()
else None
),
profileSampleType=(
self.profiler_interface.sampler.sample_config.profileSampleType
self.profiler_interface.sampler.sample_config.get_static_config().profileSampleType
if self.profiler_interface.sampler.sample_config
and self.profiler_interface.sampler.sample_config.get_static_config()
else None
),
customMetrics=self._table_results.get("customMetrics"),

View file

@ -44,7 +44,7 @@ from metadata.sampler.config import (
get_exclude_columns,
get_include_columns,
)
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import ProfileSampleConfig, SampleConfig
from metadata.sampler.sampler_interface import SamplerInterface
from metadata.utils.dependency_injector.dependency_injector import (
DependencyNotFoundError,
@ -141,6 +141,19 @@ class ProfilerSource(ProfilerSourceInterface):
return config_copy
def _build_default_sample_config(self) -> SampleConfig:
"""Build a SampleConfig from the pipeline's profileSampleConfig."""
profile_sample_config = None
raw = self.source_config.profileSampleConfig if self.source_config else None
if raw:
profile_sample_config = ProfileSampleConfig.model_validate(raw.model_dump())
return SampleConfig(
profileSampleConfig=profile_sample_config,
randomizedSample=self.source_config.randomizedSample
if self.source_config
else False,
)
@inject
def create_profiler_interface(
self,
@ -177,12 +190,7 @@ class ProfilerSource(ProfilerSourceInterface):
schema_entity=schema_entity,
database_entity=database_entity,
table_config=config,
default_sample_config=SampleConfig(
profileSample=self.source_config.profileSample,
profileSampleType=self.source_config.profileSampleType,
samplingMethodType=self.source_config.samplingMethodType,
randomizedSample=self.source_config.randomizedSample,
),
default_sample_config=self._build_default_sample_config(),
# TODO: Change this when we have the processing engine configuration implemented. Right now it does nothing.
processing_engine=self.get_processing_engine(self.source_config),
)

View file

@ -31,7 +31,14 @@ from metadata.profiler.config import (
get_database_profiler_config,
get_schema_profiler_config,
)
from metadata.sampler.models import DatabaseAndSchemaConfig, SampleConfig, TableConfig
from metadata.sampler.models import (
DatabaseAndSchemaConfig,
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
TableConfig,
)
def get_sample_storage_config(
@ -96,6 +103,55 @@ def get_storage_config_for_table(
return None
def _resolve_profile_sample_config(
entity_config: Optional[Union[TableConfig, DatabaseAndSchemaConfig]],
table_profiler_config,
schema_profiler_config,
database_profiler_config,
default_sample_config: Optional[SampleConfig],
) -> Optional[ProfileSampleConfig]:
"""Resolve profileSampleConfig through the config hierarchy.
Checks profileSampleConfig first, then falls back to flat profileSample
fields on manual config models (TableConfig, DatabaseAndSchemaConfig).
"""
for config in (
entity_config,
table_profiler_config,
schema_profiler_config,
database_profiler_config,
default_sample_config,
):
if not config:
continue
try:
psc = config.profileSampleConfig
if psc:
unwrapped = psc.root if hasattr(psc, "root") else psc
if isinstance(unwrapped, ProfileSampleConfig):
return unwrapped
return ProfileSampleConfig.model_validate(
unwrapped.model_dump()
if hasattr(unwrapped, "model_dump")
else unwrapped
)
except AttributeError:
pass
try:
if config.profileSample:
return ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=config.profileSample,
profileSampleType=config.profileSampleType,
samplingMethodType=config.samplingMethodType,
),
)
except AttributeError:
pass
return None
def get_profile_sample_config(
entity: Table,
schema_entity: Optional[DatabaseSchema],
@ -109,25 +165,15 @@ def get_profile_sample_config(
database_entity=database_entity
)
for config in (
entity_config,
entity.tableProfilerConfig,
schema_profiler_config,
database_profiler_config,
default_sample_config,
):
try:
if config and config.profileSample:
return SampleConfig(
profileSample=config.profileSample,
profileSampleType=config.profileSampleType,
samplingMethodType=config.samplingMethodType,
randomizedSample=config.randomizedSample,
)
except AttributeError:
pass
profile_sample_config = _resolve_profile_sample_config(
entity_config=entity_config,
table_profiler_config=entity.tableProfilerConfig,
schema_profiler_config=schema_profiler_config,
database_profiler_config=database_profiler_config,
default_sample_config=default_sample_config,
)
return SampleConfig()
return SampleConfig(profileSampleConfig=profile_sample_config)
def get_sample_query(

View file

@ -11,30 +11,76 @@
"""
Sampling Models
"""
from enum import Enum
from typing import Any, List, Optional, Union
from pydantic import Field, model_validator
from pydantic import Field, field_validator, model_validator
from typing_extensions import Annotated
from metadata.config.common import ConfigModel
from metadata.generated.schema.entity.data.table import (
ColumnProfilerConfig,
PartitionProfilerConfig,
ProfileSampleType,
SamplingMethodType,
Table,
TableData,
)
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
SampleDataStorageConfig,
)
from metadata.generated.schema.type.basic import FullyQualifiedEntityName
from metadata.generated.schema.type.basic import (
FullyQualifiedEntityName,
ProfileSampleType,
SamplingMethodType,
)
from metadata.ingestion.models.custom_pydantic import BaseModel
from metadata.ingestion.models.table_metadata import ColumnTag
from metadata.pii.types import ClassifiableEntityType
class ProfileSampleConfigType(str, Enum):
STATIC = "STATIC"
DYNAMIC = "DYNAMIC"
class DynamicSamplingThreshold(ConfigModel):
"""Single threshold entry for dynamic sampling"""
rowCountThreshold: int
profileSample: Union[float, int]
profileSampleType: Optional[ProfileSampleType] = ProfileSampleType.PERCENTAGE
samplingMethodType: Optional[SamplingMethodType] = None
class DynamicSamplingConfig(ConfigModel):
"""Configuration for dynamic sampling with row-count-based thresholds"""
thresholds: Optional[List[DynamicSamplingThreshold]] = None
@field_validator("thresholds")
@classmethod
def sort_thresholds_descending(
cls, v: Optional[List[DynamicSamplingThreshold]]
) -> Optional[List[DynamicSamplingThreshold]]:
if v is not None:
return sorted(v, key=lambda t: t.rowCountThreshold, reverse=True)
return v
class StaticSamplingConfig(ConfigModel):
"""Configuration for static sampling"""
profileSample: Optional[Union[float, int]] = None
profileSampleType: Optional[ProfileSampleType] = ProfileSampleType.PERCENTAGE
samplingMethodType: Optional[SamplingMethodType] = None
class ProfileSampleConfig(ConfigModel):
"""Profile sample configuration supporting static and dynamic sampling"""
sampleConfigType: ProfileSampleConfigType = ProfileSampleConfigType.STATIC
config: Optional[Union[DynamicSamplingConfig, StaticSamplingConfig]] = None
class BaseProfileConfig(ConfigModel):
"""base profile config"""
@ -43,7 +89,8 @@ class BaseProfileConfig(ConfigModel):
profileSampleType: Optional[ProfileSampleType] = None
samplingMethodType: Optional[SamplingMethodType] = None
sampleDataCount: Optional[int] = 100
randomizedSample: Optional[bool] = False
randomizedSample: Optional[bool] = True
profileSampleConfig: Optional[ProfileSampleConfig] = None
class ColumnConfig(ConfigModel):
@ -71,6 +118,7 @@ class TableConfig(BaseProfileConfig):
profileSampleType=config.profileSampleType,
sampleDataCount=config.sampleDataCount,
samplingMethodType=config.samplingMethodType,
profileSampleConfig=config.profileSampleConfig,
)
return table_config
@ -125,7 +173,13 @@ class SamplerResponse(ConfigModel):
class SampleConfig(ConfigModel):
"""Profile Sample Config"""
profileSample: Optional[Union[float, int]] = None
profileSampleType: Optional[ProfileSampleType] = ProfileSampleType.PERCENTAGE
samplingMethodType: Optional[SamplingMethodType] = None
randomizedSample: Optional[bool] = False
profileSampleConfig: Optional[ProfileSampleConfig] = None
randomizedSample: Optional[bool] = True
def get_static_config(self) -> Optional[StaticSamplingConfig]:
"""Extract the StaticSamplingConfig from profileSampleConfig, or None."""
if self.profileSampleConfig and self.profileSampleConfig.config:
cfg = self.profileSampleConfig.config
if isinstance(cfg, StaticSamplingConfig):
return cfg
return None

View file

@ -11,7 +11,8 @@
"""NoSQL Sampler"""
from typing import Dict, List, Optional, Tuple
from metadata.generated.schema.entity.data.table import ProfileSampleType, TableData
from metadata.generated.schema.entity.data.table import TableData
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.profiler.adaptors.factory import factory
from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
from metadata.sampler.sampler_interface import SamplerInterface
@ -87,10 +88,11 @@ class NoSQLSampler(SamplerInterface):
def _get_limit(self) -> Optional[int]:
num_rows = self.client.item_count(self.raw_dataset)
if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
limit = num_rows * (self.sample_config.profileSample or 100 / 100)
elif self.sample_config.profileSampleType == ProfileSampleType.ROWS:
limit = self.sample_config.profileSample
static = self.sample_config.get_static_config()
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
limit = num_rows * (static.profileSample or 100 / 100)
elif static and static.profileSampleType == ProfileSampleType.ROWS:
limit = static.profileSample
else:
limit = SAMPLE_DATA_DEFAULT_COUNT
return limit

View file

@ -20,11 +20,8 @@ from typing import Callable, Iterator, List, Optional
import pandas as pd
from metadata.generated.schema.entity.data.table import (
DataType,
ProfileSampleType,
TableData,
)
from metadata.generated.schema.entity.data.table import DataType, TableData
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.ingestion.source.database.burstiq.client import BurstIQClient
from metadata.sampler.sampler_interface import SamplerInterface
from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
@ -81,8 +78,9 @@ class BurstIQSampler(SamplerInterface):
return self._cached_frames
chain = self.entity.name.root
sample = self.sample_config.profileSample
sample_type = self.sample_config.profileSampleType
static = self.sample_config.get_static_config()
sample = static.profileSample if static else None
sample_type = static.profileSampleType if static else None
if sample and sample_type == ProfileSampleType.ROWS:
total_limit: Optional[int] = int(sample)

View file

@ -16,9 +16,9 @@ from typing import Callable, List, Optional, cast
from metadata.generated.schema.entity.data.table import (
PartitionProfilerConfig,
ProfileSampleType,
TableData,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.mixins.pandas.pandas_mixin import PandasInterfaceMixin
from metadata.sampler.sampler_interface import SamplerInterface
from metadata.utils.datalake.datalake_utils import GenericDataFrameColumnParser
@ -107,13 +107,15 @@ class DatalakeSampler(SamplerInterface, PandasInterfaceMixin):
if self.partition_details:
raw_dataset = self._partitioned_table()
if not self.sample_config.profileSample:
return raw_dataset
static = self.sample_config.get_static_config()
if (
self.sample_config.profileSample == 100
and self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE
and self.sample_config.randomizedSample is not True
not static
or not static.profileSample
or (
static.profileSample == 100
and static.profileSampleType == ProfileSampleType.PERCENTAGE
and self.sample_config.randomizedSample is not True
)
):
return raw_dataset
return self.get_sampled_dataframe(raw_dataset, self.sample_config)

View file

@ -37,14 +37,15 @@ class AzureSQLSampler(SQASampler):
Args:
selectable (Table): _description_
"""
static = self.sample_config.get_static_config()
if self.entity.tableType != TableType.View:
if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
return selectable.tablesample(
text(f"{self.sample_config.profileSample or 100} PERCENT")
text(f"{static.profileSample or 100} PERCENT")
)
return selectable.tablesample(
text(f"{int(self.sample_config.profileSample or 100)} ROWS")
text(f"{int(static.profileSample or 100 if static else 100)} ROWS")
)
return selectable

View file

@ -20,11 +20,7 @@ from sqlalchemy import Table as SqaTable
from sqlalchemy import text
from sqlalchemy.orm import Query
from metadata.generated.schema.entity.data.table import (
ProfileSampleType,
Table,
TableType,
)
from metadata.generated.schema.entity.data.table import Table, TableType
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
DataStorageConfig,
)
@ -33,6 +29,7 @@ from metadata.generated.schema.entity.services.connections.database.datalakeConn
)
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
from metadata.generated.schema.security.credentials.gcpValues import SingleProjectId
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.ingestion.connections.session import create_and_bind_thread_safe_session
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.sampler.models import SampleConfig
@ -96,12 +93,14 @@ class BigQuerySampler(SQASampler):
Args:
selectable (Table): Table object
"""
static = self.sample_config.get_static_config()
if (
self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE
static
and static.profileSampleType == ProfileSampleType.PERCENTAGE
and self.raw_dataset_type != TableType.View
):
return selectable.tablesample(
text(f"{self.sample_config.profileSample or 100} PERCENT")
text(f"{static.profileSample or 100} PERCENT")
)
return selectable
@ -136,8 +135,10 @@ class BigQuerySampler(SQASampler):
def get_sample_query(self, *, column=None) -> Query:
"""get query for sample data"""
# TABLESAMPLE SYSTEM is not supported for views
static = self.sample_config.get_static_config()
if (
self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE
static
and static.profileSampleType == ProfileSampleType.PERCENTAGE
and self.raw_dataset_type != TableType.View
):
return self._base_sample_query(column).cte(

View file

@ -17,7 +17,8 @@ for the profiler
from sqlalchemy import Table, text
from sqlalchemy.sql.selectable import CTE
from metadata.generated.schema.entity.data.table import ProfileSampleType, TableType
from metadata.generated.schema.entity.data.table import TableType
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -32,14 +33,15 @@ class MssqlSampler(SQASampler):
Args:
selectable (Table): _description_
"""
static = self.sample_config.get_static_config()
if self.entity.tableType != TableType.View:
if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
return selectable.tablesample(
text(f"{self.sample_config.profileSample or 100} PERCENT")
text(f"{static.profileSample or 100} PERCENT")
)
return selectable.tablesample(
text(f"{int(self.sample_config.profileSample or 100)} ROWS")
text(f"{int(static.profileSample or 100 if static else 100)} ROWS")
)
return selectable

View file

@ -17,7 +17,7 @@ from sqlalchemy import Table as SqaTable
from sqlalchemy import func
from sqlalchemy.orm import Query
from metadata.generated.schema.entity.data.table import ProfileSampleType, Table
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
DataStorageConfig,
)
@ -25,6 +25,7 @@ from metadata.generated.schema.entity.services.connections.database.datalakeConn
DatalakeConnection,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.sampler.models import SampleConfig
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -64,26 +65,25 @@ class PostgresSampler(SQASampler):
)
self.sampling_fn = func.bernoulli
self.sampling_method_type = SamplingMethodType.BERNOULLI
if (
sample_config
and sample_config.samplingMethodType == SamplingMethodType.SYSTEM
):
self.sampling_fn = func.system
if sample_config:
static = sample_config.get_static_config()
if static and static.samplingMethodType == SamplingMethodType.SYSTEM:
self.sampling_fn = func.system
def set_tablesample(self, selectable: SqaTable):
"""Set the TABLESAMPLE clause for postgres
Args:
selectable (Table): _description_
"""
if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
return selectable.tablesample(
self.sampling_fn(self.sample_config.profileSample or 100)
)
static = self.sample_config.get_static_config()
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
return selectable.tablesample(self.sampling_fn(static.profileSample or 100))
return selectable
def get_sample_query(self, *, column=None) -> Query:
if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
static = self.sample_config.get_static_config()
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
return self._base_sample_query(column).cte(
f"{self.get_sampler_table_name()}_rnd"
)

View file

@ -23,9 +23,9 @@ from sqlalchemy.sql.sqltypes import Enum
from metadata.generated.schema.entity.data.table import (
PartitionProfilerConfig,
ProfileSampleType,
TableData,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.ingestion.connections.session import create_and_bind_thread_safe_session
from metadata.mixins.sqalchemy.sqa_mixin import SQAInterfaceMixin
from metadata.profiler.orm.functions.modulo import ModuloFn
@ -157,19 +157,23 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin):
def get_sample_query(self, *, column=None) -> Query:
"""get query for sample data"""
static = self.sample_config.get_static_config()
with self.session_factory() as client:
if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
rnd = self._base_sample_query(
column,
(ModuloFn(RandomNumFn(), 100)).label(RANDOM_LABEL),
).cte(f"{self.get_sampler_table_name()}_rnd")
session_query = client.query(rnd)
query = session_query.where(
rnd.c.random <= self.sample_config.profileSample
session_query = session_query.where(
rnd.c.random <= static.profileSample
)
if self.sample_config.randomizedSample is True:
query = query.order_by(rnd.c.random)
return query.cte(f"{self.get_sampler_table_name()}_sample")
if (
static.profileSample == 100
and self.sample_config.randomizedSample is True
):
session_query = session_query.order_by(rnd.c.random)
return session_query.cte(f"{self.get_sampler_table_name()}_sample")
table_query = client.query(self.raw_dataset)
if self.partition_details:
@ -185,7 +189,7 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin):
if self.sample_config.randomizedSample is True
else session_query
)
return query.limit(self.sample_config.profileSample).cte(
return query.limit(static.profileSample if static else None).cte(
f"{self.get_sampler_table_name()}_rnd"
)
@ -197,16 +201,15 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin):
if self.sample_query:
return self._rdn_sample_from_user_query()
if not self.sample_config.profileSample:
if self.partition_details:
return self._partitioned_table()
return self.raw_dataset
static = self.sample_config.get_static_config()
if (
self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE
and self.sample_config.profileSample == 100
and self.sample_config.randomizedSample is not True
not static
or not static.profileSample
or (
static.profileSampleType == ProfileSampleType.PERCENTAGE
and static.profileSample == 100
and self.sample_config.randomizedSample is not True
)
):
if self.partition_details:
return self._partitioned_table()

View file

@ -18,10 +18,6 @@ from typing import Dict, Optional, Union
from sqlalchemy import Table, func, text
from sqlalchemy.sql.selectable import CTE
from metadata.generated.schema.entity.data.table import (
ProfileSampleType,
SamplingMethodType,
)
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
DataStorageConfig,
)
@ -29,6 +25,7 @@ from metadata.generated.schema.entity.services.connections.database.datalakeConn
DatalakeConnection,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
from metadata.generated.schema.type.basic import ProfileSampleType, SamplingMethodType
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.sampler.models import SampleConfig
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -66,24 +63,24 @@ class SnowflakeSampler(SQASampler):
**kwargs,
)
self.sampling_method_type = func.bernoulli
if (
sample_config
and sample_config.samplingMethodType == SamplingMethodType.SYSTEM
):
self.sampling_method_type = func.system
if sample_config:
static = sample_config.get_static_config()
if static and static.samplingMethodType == SamplingMethodType.SYSTEM:
self.sampling_method_type = func.system
def set_tablesample(self, selectable: Table):
"""Set the TABLESAMPLE clause for Snowflake
Args:
selectable (Table): _description_
"""
if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE:
static = self.sample_config.get_static_config()
if static and static.profileSampleType == ProfileSampleType.PERCENTAGE:
return selectable.tablesample(
self.sampling_method_type(self.sample_config.profileSample or 100)
self.sampling_method_type(static.profileSample or 100)
)
return selectable.tablesample(
func.ROW(text(f"{self.sample_config.profileSample or 100} ROWS"))
func.ROW(text(f"{static.profileSample or 100 if static else 100} ROWS"))
)
def get_sample_query(self, *, column=None) -> CTE:

View file

@ -23,13 +23,12 @@ from metadata.data_quality.api.models import TestCaseDefinition
from metadata.generated.schema.entity.data.table import (
ColumnProfile,
DmlOperationType,
ProfileSampleType,
SystemProfile,
TableProfilerConfig,
)
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
from metadata.generated.schema.tests.testCase import TestCaseParameterValue
from metadata.generated.schema.type.basic import Timestamp
from metadata.generated.schema.type.basic import ProfileSampleType, Timestamp
from .common.test_cli_db import CliCommonDB
from .common_e2e_sqa_mixins import SQACommonMethods

View file

@ -15,11 +15,7 @@ from sqlalchemy.sql import sqltypes
from _openmetadata_testutils.postgres.conftest import postgres_container
from _openmetadata_testutils.pydantic.test_utils import assert_equal_pydantic_objects
from metadata.data_quality.api.models import TestCaseDefinition
from metadata.generated.schema.entity.data.table import (
ProfileSampleType,
Table,
TableProfilerConfig,
)
from metadata.generated.schema.entity.data.table import Table, TableProfilerConfig
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.metadataIngestion.testSuitePipeline import (
TestSuiteConfigType,
@ -30,6 +26,7 @@ from metadata.generated.schema.tests.basic import (
TestResultValue,
)
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.data_quality import TestSuiteWorkflow
@ -93,8 +90,13 @@ class TestParameters(BaseModel):
passedRows=IsApprox(59, delta=60) & IsPositiveInt,
),
TableProfilerConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=10,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 10,
"profileSampleType": "PERCENTAGE",
},
),
),
),
(
@ -118,8 +120,13 @@ class TestParameters(BaseModel):
passedRows=IsApprox(10, delta=15) & IsPositiveInt,
),
TableProfilerConfig(
profileSampleType=ProfileSampleType.ROWS,
profileSample=10,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 10,
"profileSampleType": "ROWS",
},
),
),
),
(
@ -349,8 +356,13 @@ class TestParameters(BaseModel):
testCaseStatus=TestCaseStatus.Success,
),
TableProfilerConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=10,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 10,
"profileSampleType": "PERCENTAGE",
},
),
),
),
(

View file

@ -18,10 +18,10 @@ import pytest
from metadata.generated.schema.entity.data.table import (
PartitionIntervalTypes,
ProfileSampleType,
TableProfilerConfig,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.sampler.models import PartitionProfilerConfig
from metadata.workflow.classification import AutoClassificationWorkflow
from metadata.workflow.data_quality import TestSuiteWorkflow

View file

@ -197,7 +197,7 @@ PROFILER_INGESTION_CONFIG_TEMPLATE = dedent(
"serviceConnection": {{
"config": {service_config}
}},
"sourceConfig": {{"config": {{"type":"Profiler", "profileSample": 100}}}}
"sourceConfig": {{"config": {{"type":"Profiler", "profileSampleConfig": {{"sampleConfigType": "STATIC", "config": {{"profileSample": 100, "profileSampleType": "PERCENTAGE"}}}}}}}}
}},
"processor": {{"type": "orm-profiler", "config": {{}}}},
"sink": {{"type": "metadata-rest", "config": {{}}}},

View file

@ -53,6 +53,8 @@ from metadata.generated.schema.type.basic import (
)
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.generated.schema.type.entityReferenceList import EntityReferenceList
from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig
from metadata.generated.schema.type.staticSamplingConfig import StaticSamplingConfig
from metadata.generated.schema.type.usageRequest import UsageRequest
from metadata.ingestion.ometa.client import REST
@ -447,7 +449,9 @@ class TestOMetaTableAPI:
table = metadata.get_latest_table_profile(expected_fqn)
assert table.profile == table_profile
assert table.profile.timestamp == table_profile.timestamp
assert table.profile.columnCount == table_profile.columnCount
assert table.profile.rowCount == table_profile.rowCount
res_column_profile = next(
(col.profile for col in table.columns if col.name.root == "id")
@ -620,13 +624,21 @@ class TestOMetaTableAPI:
assert table.tableProfilerConfig is None
metadata._create_or_update_table_profiler_config(
table.id, table_profiler_config=TableProfilerConfig(profileSample=50.0)
table.id,
table_profiler_config=TableProfilerConfig(
profileSampleConfig=ProfileSampleConfig(
config=StaticSamplingConfig(profileSample=50.0)
)
),
)
stored = metadata.get_by_name(
entity=Table, fqn=table.fullyQualifiedName, fields=["tableProfilerConfig"]
)
assert stored.tableProfilerConfig.profileSample == 50.0
assert (
stored.tableProfilerConfig.profileSampleConfig.root.config.profileSample
== 50.0
)
def test_list_w_skip_on_failure(self, metadata):
"""

View file

@ -25,11 +25,7 @@ import pytest
from sqlalchemy import Column, DateTime, Integer, String, create_engine
from sqlalchemy.orm import DeclarativeBase
from metadata.generated.schema.entity.data.table import (
ColumnProfile,
ProfileSampleType,
Table,
)
from metadata.generated.schema.entity.data.table import ColumnProfile, Table
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
@ -37,6 +33,7 @@ from metadata.generated.schema.entity.services.databaseService import DatabaseSe
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
OpenMetadataJWTClientConfig,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.ingestion.connections.session import create_and_bind_session
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.utils.time_utils import (
@ -283,7 +280,7 @@ def test_profiler_workflow(ingest, metadata, service_name):
assert not table.tableProfilerConfig
assert profile.profileSample == 75.0
assert profile.profileSampleType == ProfileSampleType.PERCENTAGE
assert profile.profileSampleType.root == ProfileSampleType.PERCENTAGE
workflow_config["processor"]["config"]["tableConfig"][0][
"profileSampleType"
@ -307,7 +304,7 @@ def test_profiler_workflow(ingest, metadata, service_name):
assert not table.tableProfilerConfig
assert profile.profileSample == 3.0
assert profile.rowCount == 4.0
assert profile.profileSampleType == ProfileSampleType.ROWS
assert profile.profileSampleType.root == ProfileSampleType.ROWS
def test_workflow_sample_profile(ingest, metadata, service_name):
@ -316,7 +313,13 @@ def test_workflow_sample_profile(ingest, metadata, service_name):
workflow_config["source"]["sourceConfig"]["config"].update(
{
"type": "Profiler",
"profileSample": 50,
"profileSampleConfig": {
"sampleConfigType": "STATIC",
"config": {
"profileSample": 50,
"profileSampleType": "PERCENTAGE",
},
},
"tableFilterPattern": {"includes": ["newUsers"]},
}
)

View file

@ -35,7 +35,6 @@ from metadata.generated.schema.entity.data.table import (
DataType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
TableProfilerConfig,
)
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
@ -51,6 +50,7 @@ from metadata.generated.schema.entity.services.databaseService import (
DatabaseServiceType,
)
from metadata.generated.schema.tests.testCase import TestCase
from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.data_quality import TestSuiteWorkflow
@ -318,8 +318,13 @@ class TestE2EWorkflow(unittest.TestCase):
self.metadata.create_or_update_table_profiler_config(
fqn=fqn,
table_profiler_config=TableProfilerConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=50.0,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 50.0,
"profileSampleType": "PERCENTAGE",
},
),
),
)
@ -382,8 +387,13 @@ class TestE2EWorkflow(unittest.TestCase):
self.metadata.create_or_update_table_profiler_config(
fqn=fqn,
table_profiler_config=TableProfilerConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=100.0,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 100.0,
"profileSampleType": "PERCENTAGE",
},
),
partitioning=PartitionProfilerConfig(
enablePartitioning=True,
partitionIntervalType=PartitionIntervalTypes.COLUMN_VALUE,

View file

@ -13,13 +13,14 @@ from metadata.data_quality.validations.table.sqlalchemy.tableDiff import (
from metadata.generated.schema.entity.data.table import (
Column,
DataType,
ProfileSampleType,
TableProfilerConfig,
)
from metadata.generated.schema.entity.services.databaseService import (
DatabaseServiceType,
)
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig
@pytest.mark.parametrize(
@ -49,8 +50,13 @@ def test_compile_and_clauses(elements, expected):
**{
"database_service_type": "BigQuery",
"table_profile_config": TableProfilerConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=10,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 10,
"profileSampleType": "PERCENTAGE",
},
),
),
"table1": TableParameter.model_construct(
**{
@ -82,8 +88,13 @@ def test_compile_and_clauses(elements, expected):
**{
"database_service_type": "BigQuery",
"table_profile_config": TableProfilerConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=20,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 20,
"profileSampleType": "PERCENTAGE",
},
),
),
"table1": TableParameter.model_construct(
**{
@ -115,8 +126,13 @@ def test_compile_and_clauses(elements, expected):
**{
"database_service_type": "BigQuery",
"table_profile_config": TableProfilerConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=10,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 10,
"profileSampleType": "PERCENTAGE",
},
),
),
"table1": TableParameter.model_construct(
**{
@ -148,8 +164,13 @@ def test_compile_and_clauses(elements, expected):
**{
"database_service_type": "BigQuery",
"table_profile_config": TableProfilerConfig(
profileSampleType=ProfileSampleType.ROWS,
profileSample=20,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 20,
"profileSampleType": "ROWS",
},
),
),
"table1": TableParameter.model_construct(
**{
@ -180,8 +201,13 @@ def test_compile_and_clauses(elements, expected):
TableDiffRuntimeParameters.model_construct(
**{
"table_profile_config": TableProfilerConfig(
profileSampleType=ProfileSampleType.ROWS,
profileSample=20,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 20,
"profileSampleType": "ROWS",
},
),
),
"table1": TableParameter.model_construct(
**{
@ -253,10 +279,12 @@ def test_sample_where_clauses(config, expected):
None,
)
validator.runtime_params = config
if (
config.table_profile_config
and config.table_profile_config.profileSampleType == ProfileSampleType.ROWS
):
table_profile_config = config.table_profile_config if config else None
profile_sample_config = (
table_profile_config.profileSampleConfig.root if table_profile_config else None
)
sample_config = profile_sample_config.config if profile_sample_config else None
if sample_config and sample_config.profileSampleType == ProfileSampleType.ROWS:
validator.get_total_row_count = Mock(return_value=10_000)
with patch("random.choices", Mock(return_value=["a"])):
assert validator.sample_where_clause() == expected

View file

@ -34,7 +34,12 @@ from metadata.profiler.interface.pandas.profiler_interface import (
from metadata.profiler.metrics.registry import Metrics
from metadata.profiler.processor.core import Profiler
from metadata.readers.dataframe.models import DatalakeColumnWrapper
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.pandas.sampler import DatalakeSampler
@ -175,7 +180,12 @@ class DatalakeSampleTest(TestCase):
service_connection_config=DatalakeConnection(configSource={}),
ometa_client=None,
entity=cls.table_entity,
sample_config=SampleConfig(profileSample=50.0),
sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
)
cls.datalake_profiler_interface = PandasProfilerInterface(
service_connection_config=DatalakeConnection(configSource={}),
@ -211,7 +221,12 @@ class DatalakeSampleTest(TestCase):
service_connection_config=DatalakeConnection(configSource={}),
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(profileSample=50.0),
sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
)
random_sample = sampler.get_dataset()
res = sum(len(r) for r in random_sample())
@ -245,7 +260,12 @@ class DatalakeSampleTest(TestCase):
service_connection_config=DatalakeConnection(configSource={}),
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(profileSample=50.0),
sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
)
datalake_profiler_interface = PandasProfilerInterface(
service_connection_config=DatalakeConnection(configSource={}),
@ -326,7 +346,12 @@ class DatalakeSampleTest(TestCase):
service_connection_config=DatalakeConnection(configSource={}),
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(profileSample=50.0),
sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
)
sample_data = sampler.fetch_sample_data()
@ -358,7 +383,12 @@ class DatalakeSampleTest(TestCase):
service_connection_config=DatalakeConnection(configSource={}),
ometa_client=None,
entity=self.table_entity,
default_sample_config=SampleConfig(profileSample=50.0),
default_sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
sample_query="`age` > 30",
)
sample_data = sampler.fetch_sample_data()

View file

@ -21,16 +21,21 @@ from metadata.generated.schema.entity.data.table import (
DataType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
Table,
)
from metadata.generated.schema.entity.services.connections.database.azureSQLConnection import (
AzureSQLConnection,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
SQAProfilerInterface,
)
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sqlalchemy.azuresql.sampler import AzureSQLSampler
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -93,7 +98,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -116,7 +127,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.ROWS, profileSample=50
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50,
profileSampleType=ProfileSampleType.ROWS,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -139,8 +156,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=50.0,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
partition_details=PartitionProfilerConfig(
enablePartitioning=True,

View file

@ -12,7 +12,6 @@ from metadata.generated.schema.entity.data.table import (
DataType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
Table,
)
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
@ -22,12 +21,18 @@ from metadata.generated.schema.security.credentials.gcpCredentials import GCPCre
from metadata.generated.schema.security.credentials.gcpValues import (
GcpCredentialsValues,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
SQAProfilerInterface,
)
from metadata.profiler.orm.functions.table_metric_computer import TableType
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sqlalchemy.bigquery.sampler import BigQuerySampler
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -115,7 +120,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
table_type=TableType.Regular,
)
@ -150,7 +161,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=view_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -186,7 +203,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=view_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
partition_details=PartitionProfilerConfig(
enablePartitioning=True,
@ -216,7 +239,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
partition_details=PartitionProfilerConfig(
enablePartitioning=True,

View file

@ -12,16 +12,21 @@ from metadata.generated.schema.entity.data.table import (
DataType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
Table,
)
from metadata.generated.schema.entity.services.connections.database.mssqlConnection import (
MssqlConnection,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
SQAProfilerInterface,
)
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sqlalchemy.mssql.sampler import MssqlSampler
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -84,7 +89,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -107,7 +118,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.ROWS, profileSample=50
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50,
profileSampleType=ProfileSampleType.ROWS,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -130,8 +147,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=50.0,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
partition_details=PartitionProfilerConfig(
enablePartitioning=True,

View file

@ -12,17 +12,21 @@ from metadata.generated.schema.entity.data.table import (
DataType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
SamplingMethodType,
Table,
)
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
PostgresConnection,
)
from metadata.generated.schema.type.basic import ProfileSampleType, SamplingMethodType
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
SQAProfilerInterface,
)
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sqlalchemy.postgres.sampler import PostgresSampler
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -83,8 +87,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=50.0,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -109,9 +118,14 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=50.0,
samplingMethodType=sampling_method_type,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
samplingMethodType=sampling_method_type,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -130,7 +144,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
partition_details=PartitionProfilerConfig(
enablePartitioning=True,

View file

@ -12,17 +12,21 @@ from metadata.generated.schema.entity.data.table import (
DataType,
PartitionIntervalTypes,
PartitionProfilerConfig,
ProfileSampleType,
SamplingMethodType,
Table,
)
from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import (
SnowflakeConnection,
)
from metadata.generated.schema.type.basic import ProfileSampleType, SamplingMethodType
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
SQAProfilerInterface,
)
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sqlalchemy.sampler import SQASampler
from metadata.sampler.sqlalchemy.snowflake.sampler import SnowflakeSampler
@ -82,7 +86,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -109,9 +119,14 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=50.0,
samplingMethodType=sampling_method_type,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
samplingMethodType=sampling_method_type,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -134,7 +149,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.ROWS, profileSample=50
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50,
profileSampleType=ProfileSampleType.ROWS,
),
)
),
)
query: CTE = sampler.get_sample_query()
@ -157,8 +178,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=self.table_entity,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=50.0,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=50.0,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
)
),
partition_details=PartitionProfilerConfig(
enablePartitioning=True,

View file

@ -24,7 +24,12 @@ from sqlalchemy.orm import DeclarativeBase
from metadata.ingestion.connections.session import create_and_bind_session
from metadata.profiler.processor.runner import QueryRunner
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sqlalchemy.sampler import SQASampler
from metadata.utils.timeout import cls_timeout
@ -92,7 +97,12 @@ class RunnerTest(TestCase):
service_connection_config=Mock(),
ometa_client=None,
entity=None,
sample_config=SampleConfig(profileSample=50.0),
sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
)
cls.dataset = sampler.get_dataset()

View file

@ -21,23 +21,24 @@ from sqlalchemy import TEXT, Column, Integer, String, func
from sqlalchemy.orm import DeclarativeBase
from metadata.generated.schema.entity.data.table import Column as EntityColumn
from metadata.generated.schema.entity.data.table import (
ColumnName,
DataType,
ProfileSampleType,
Table,
)
from metadata.generated.schema.entity.data.table import ColumnName, DataType, Table
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
SQLiteConnection,
SQLiteScheme,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
SQAProfilerInterface,
)
from metadata.profiler.metrics.registry import Metrics
from metadata.profiler.orm.registry import CustomTypes
from metadata.profiler.processor.core import Profiler
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.sqlalchemy.sampler import SQASampler
@ -111,7 +112,12 @@ class SampleTest(TestCase):
service_connection_config=cls.sqlite_conn,
ometa_client=None,
entity=None,
sample_config=SampleConfig(profileSample=50.0),
sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
)
cls.dataset = cls.sampler.get_dataset()
cls.sqa_profiler_interface = SQAProfilerInterface(
@ -357,7 +363,12 @@ class SampleTest(TestCase):
service_connection_config=self.sqlite_conn,
ometa_client=None,
entity=None,
sample_config=SampleConfig(profileSample=50.0),
sample_config=SampleConfig(
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(profileSample=50.0),
)
),
sample_query=stmt,
)
sample_data = sampler.fetch_sample_data()
@ -375,8 +386,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=None,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=100,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
),
randomizedSample=True,
),
sample_data_count=5,
@ -397,8 +413,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=None,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=100,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
),
randomizedSample=False,
),
sample_data_count=5,
@ -419,8 +440,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=None,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=100,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
),
randomizedSample=None,
),
sample_data_count=5,
@ -441,8 +467,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=None,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=100,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
),
randomizedSample=True,
),
sample_data_count=5,
@ -462,8 +493,13 @@ class SampleTest(TestCase):
ometa_client=None,
entity=None,
sample_config=SampleConfig(
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSample=100,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
),
randomizedSample=False,
),
sample_data_count=5,

View file

@ -21,20 +21,15 @@ from metadata.generated.schema.entity.data.databaseSchema import (
DatabaseSchema,
DatabaseSchemaProfilerConfig,
)
from metadata.generated.schema.entity.data.table import (
ProfileSampleType,
Table,
TableProfilerConfig,
)
from metadata.generated.schema.entity.data.table import Table, TableProfilerConfig
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
DataStorageConfig,
SampleDataStorageConfig,
)
from metadata.generated.schema.metadataIngestion.databaseServiceProfilerPipeline import (
DatabaseServiceProfilerPipeline,
)
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig
from metadata.profiler.api.models import DatabaseAndSchemaConfig, TableConfig
from metadata.profiler.config import (
get_database_profiler_config,
@ -63,8 +58,13 @@ class ProfilerInterfaceTest(TestCase):
columns=[],
tableProfilerConfig=TableProfilerConfig(
sampleDataCount=101,
profileSample=11,
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={
"profileSample": 11,
"profileSampleType": "PERCENTAGE",
},
),
),
service=EntityReference(
id="ba451e8a-5069-4a45-ac38-95421bbdcb5a",
@ -88,7 +88,10 @@ class ProfilerInterfaceTest(TestCase):
cls.schema_profiler_config = DatabaseSchemaProfilerConfig(
sampleDataCount=102,
profileSample=12,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={"profileSample": 12, "profileSampleType": "PERCENTAGE"},
),
sampleDataStorageConfig=cls.schema_storage_config,
)
@ -118,7 +121,10 @@ class ProfilerInterfaceTest(TestCase):
cls.database_profiler_config = DatabaseProfilerConfig(
sampleDataCount=202,
profileSample=22,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType="STATIC",
config={"profileSample": 22, "profileSampleType": "PERCENTAGE"},
),
sampleDataStorageConfig=cls.database_storage_config,
)
@ -156,66 +162,50 @@ class ProfilerInterfaceTest(TestCase):
)
def test_get_profile_sample_configs(self):
source_config = DatabaseServiceProfilerPipeline()
expected = SampleConfig(
profileSample=11,
profileSampleType=ProfileSampleType.PERCENTAGE,
)
# Pipeline has no profileSampleConfig set — resolution should fall through
# to table config which has profileSample=11
actual = get_profile_sample_config(
entity=self.table,
schema_entity=self.schema_entity,
database_entity=self.database_entity,
entity_config=None,
default_sample_config=SampleConfig(
profileSample=source_config.profileSample,
profileSampleType=source_config.profileSampleType,
samplingMethodType=source_config.samplingMethodType,
),
default_sample_config=SampleConfig(),
)
self.assertEqual(expected, actual)
static = actual.get_static_config()
self.assertIsNotNone(static)
self.assertEqual(static.profileSample, 11)
self.assertEqual(static.profileSampleType, ProfileSampleType.PERCENTAGE)
profiler = TableConfig(
profileSample=11,
profileSampleType=ProfileSampleType.PERCENTAGE,
fullyQualifiedName="demo",
)
expected = SampleConfig(
profileSample=11,
profileSampleType=ProfileSampleType.PERCENTAGE,
)
actual = get_profile_sample_config(
entity=self.table,
schema_entity=self.schema_entity,
database_entity=self.database_entity,
entity_config=profiler,
default_sample_config=SampleConfig(
profileSample=source_config.profileSample,
profileSampleType=source_config.profileSampleType,
samplingMethodType=source_config.samplingMethodType,
),
default_sample_config=SampleConfig(),
)
self.assertEqual(expected, actual)
static = actual.get_static_config()
self.assertIsNotNone(static)
self.assertEqual(static.profileSample, 11)
self.assertEqual(static.profileSampleType, ProfileSampleType.PERCENTAGE)
profiler = None
expected = SampleConfig(
profileSample=22,
profileSampleType=ProfileSampleType.PERCENTAGE,
)
table_copy = deepcopy(self.table)
table_copy.tableProfilerConfig = None
actual = get_profile_sample_config(
entity=table_copy,
schema_entity=None,
database_entity=self.database_entity,
entity_config=profiler,
default_sample_config=SampleConfig(
profileSample=source_config.profileSample,
profileSampleType=source_config.profileSampleType,
samplingMethodType=source_config.samplingMethodType,
),
entity_config=None,
default_sample_config=SampleConfig(),
)
self.assertEqual(expected, actual)
static = actual.get_static_config()
self.assertIsNotNone(static)
self.assertEqual(static.profileSample, 22)
self.assertEqual(static.profileSampleType, ProfileSampleType.PERCENTAGE)
def test_get_sample_data_count_config(self):
entity_config = TableConfig(

View file

@ -17,8 +17,13 @@ randomization; None and False both skip randomization.
"""
from unittest.mock import MagicMock, patch
from metadata.generated.schema.entity.data.table import ProfileSampleType
from metadata.sampler.models import SampleConfig
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.sampler.models import (
ProfileSampleConfig,
ProfileSampleConfigType,
SampleConfig,
StaticSamplingConfig,
)
class TestSQASampler100Pct:
@ -34,8 +39,13 @@ class TestSQASampler100Pct:
sampler = SQASampler()
sampler.sample_config = SampleConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
),
randomizedSample=randomized_sample,
)
sampler.sample_query = None
@ -81,8 +91,13 @@ class TestDatalakeSampler100Pct:
sampler = DatalakeSampler()
sampler.sample_config = SampleConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSampleConfig=ProfileSampleConfig(
sampleConfigType=ProfileSampleConfigType.STATIC,
config=StaticSamplingConfig(
profileSample=100,
profileSampleType=ProfileSampleType.PERCENTAGE,
),
),
randomizedSample=randomized_sample,
)
sampler.sample_query = None

View file

@ -25,15 +25,19 @@ from metadata.generated.schema.entity.data.table import Column as EntityColumn
from metadata.generated.schema.entity.data.table import (
ColumnName,
DataType,
ProfileSampleType,
Table,
TableData,
)
from metadata.generated.schema.entity.services.connections.database.burstIQConnection import (
BurstIQConnection,
)
from metadata.generated.schema.type.basic import ProfileSampleType
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.sampler.models import SampleConfig
from metadata.sampler.models import (
ProfileSampleConfig,
SampleConfig,
StaticSamplingConfig,
)
from metadata.sampler.pandas.burstiq.sampler import _PAGE_SIZE, BurstIQSampler
from metadata.utils.constants import SAMPLE_DATA_MAX_CELL_LENGTH
from metadata.utils.sqa_like_column import SQALikeColumn
@ -98,8 +102,12 @@ class TestBurstIQSamplerGetClient:
class TestBurstIQSamplerRawDataset:
def test_rows_sample_type_limits_to_exact_count(self, sampler, mock_client):
sampler.sample_config = SampleConfig(
profileSample=3,
profileSampleType=ProfileSampleType.ROWS,
profileSampleConfig=ProfileSampleConfig(
config=StaticSamplingConfig(
profileSample=3,
profileSampleType=ProfileSampleType.ROWS,
)
)
)
mock_client.get_records_by_tql.return_value = [
{"score": 1.0, "age": i} for i in range(3)
@ -115,8 +123,12 @@ class TestBurstIQSamplerRawDataset:
def test_percentage_sample_type_queries_chain_metrics(self, sampler, mock_client):
sampler.sample_config = SampleConfig(
profileSample=50,
profileSampleType=ProfileSampleType.PERCENTAGE,
profileSampleConfig=ProfileSampleConfig(
config=StaticSamplingConfig(
profileSample=50,
profileSampleType=ProfileSampleType.PERCENTAGE,
)
)
)
mock_client.get_chain_metrics.return_value = {"TestChain": 100}
mock_client.get_records_by_tql.return_value = [

View file

@ -24,8 +24,11 @@ import org.openmetadata.schema.entity.data.DatabaseSchema;
import org.openmetadata.schema.entity.services.DatabaseService;
import org.openmetadata.schema.type.ApiStatus;
import org.openmetadata.schema.type.EntityHistory;
import org.openmetadata.schema.type.ProfileSampleConfig;
import org.openmetadata.schema.type.StaticSamplingConfig;
import org.openmetadata.schema.type.api.BulkOperationResult;
import org.openmetadata.schema.type.csv.CsvImportResult;
import org.openmetadata.schema.utils.JsonUtils;
import org.openmetadata.sdk.client.OpenMetadataClient;
import org.openmetadata.sdk.fluent.DatabaseSchemas;
import org.openmetadata.sdk.fluent.Databases;
@ -435,9 +438,15 @@ public class DatabaseSchemaResourceIT extends BaseEntityIT<DatabaseSchema, Creat
if (i % 2 == 0) {
org.openmetadata.schema.type.DatabaseSchemaProfilerConfig profilerConfig =
new org.openmetadata.schema.type.DatabaseSchemaProfilerConfig()
.withProfileSampleType(
org.openmetadata.schema.type.TableProfilerConfig.ProfileSampleType.PERCENTAGE)
.withProfileSample(50.0);
.withProfileSampleConfig(
new ProfileSampleConfig()
.withSampleConfigType(ProfileSampleConfig.SampleConfigType.STATIC)
.withConfig(
new StaticSamplingConfig()
.withProfileSample(50.0)
.withProfileSampleType(
org.openmetadata.schema.type.TableProfile.ProfileSampleType
.PERCENTAGE)));
// Use dedicated SDK method to add profiler config
schema = client.databaseSchemas().addProfilerConfig(schema.getId(), profilerConfig);
@ -479,10 +488,15 @@ public class DatabaseSchemaResourceIT extends BaseEntityIT<DatabaseSchema, Creat
assertNotNull(
schema.getDatabaseSchemaProfilerConfig(),
"Even-indexed schema should have profiler config");
ProfileSampleConfig psc =
schema.getDatabaseSchemaProfilerConfig().getProfileSampleConfig();
assertNotNull(psc, "ProfileSampleConfig should be set");
StaticSamplingConfig staticConfig =
JsonUtils.convertValue(psc.getConfig(), StaticSamplingConfig.class);
assertEquals(
org.openmetadata.schema.type.TableProfilerConfig.ProfileSampleType.PERCENTAGE,
schema.getDatabaseSchemaProfilerConfig().getProfileSampleType());
assertEquals(50.0, schema.getDatabaseSchemaProfilerConfig().getProfileSample());
org.openmetadata.schema.type.TableProfile.ProfileSampleType.PERCENTAGE,
staticConfig.getProfileSampleType());
assertEquals(50.0, staticConfig.getProfileSample());
} else {
assertTrue(
schema.getDatabaseSchemaProfilerConfig() == null,
@ -513,8 +527,8 @@ public class DatabaseSchemaResourceIT extends BaseEntityIT<DatabaseSchema, Creat
bulkSchema.getDatabaseSchemaProfilerConfig(),
"Profiler config should be present in bulk fetch if present in individual fetch");
assertEquals(
individualSchema.getDatabaseSchemaProfilerConfig().getProfileSampleType(),
bulkSchema.getDatabaseSchemaProfilerConfig().getProfileSampleType(),
individualSchema.getDatabaseSchemaProfilerConfig().getProfileSampleConfig(),
bulkSchema.getDatabaseSchemaProfilerConfig().getProfileSampleConfig(),
"Profiler config should match");
} else {
assertTrue(

View file

@ -79,6 +79,8 @@ import org.openmetadata.schema.type.JoinedWith;
import org.openmetadata.schema.type.LineageDetails;
import org.openmetadata.schema.type.PartitionColumnDetails;
import org.openmetadata.schema.type.PartitionIntervalTypes;
import org.openmetadata.schema.type.ProfileSampleConfig;
import org.openmetadata.schema.type.StaticSamplingConfig;
import org.openmetadata.schema.type.TableConstraint;
import org.openmetadata.schema.type.TableData;
import org.openmetadata.schema.type.TableJoins;
@ -89,6 +91,7 @@ import org.openmetadata.schema.type.TableType;
import org.openmetadata.schema.type.TagLabel;
import org.openmetadata.schema.type.api.BulkOperationResult;
import org.openmetadata.schema.type.csv.CsvImportResult;
import org.openmetadata.schema.utils.JsonUtils;
import org.openmetadata.sdk.OM;
import org.openmetadata.sdk.client.OpenMetadataClient;
import org.openmetadata.sdk.fluent.DatabaseSchemas;
@ -1564,13 +1567,25 @@ public class TableResourceIT extends BaseEntityIT<Table, CreateTable> {
// Create profiler config
TableProfilerConfig config =
new TableProfilerConfig()
.withProfileSample(50.0)
.withProfileSampleType(TableProfilerConfig.ProfileSampleType.PERCENTAGE);
.withProfileSampleConfig(
new ProfileSampleConfig()
.withSampleConfigType(ProfileSampleConfig.SampleConfigType.STATIC)
.withConfig(
new StaticSamplingConfig()
.withProfileSample(50.0)
.withProfileSampleType(
org.openmetadata.schema.type.TableProfile.ProfileSampleType
.PERCENTAGE)));
// Update profiler config
Table updated = client.tables().updateProfilerConfig(table.getId(), config);
assertNotNull(updated.getTableProfilerConfig());
assertEquals(50.0, updated.getTableProfilerConfig().getProfileSample());
assertNotNull(updated.getTableProfilerConfig().getProfileSampleConfig());
StaticSamplingConfig staticConfig =
JsonUtils.convertValue(
updated.getTableProfilerConfig().getProfileSampleConfig().getConfig(),
StaticSamplingConfig.class);
assertEquals(50.0, staticConfig.getProfileSample());
}
// ===================================================================

View file

@ -13,6 +13,7 @@
package org.openmetadata.service.jdbi3;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.csv.CsvUtil.addDomains;
import static org.openmetadata.csv.CsvUtil.addExtension;
import static org.openmetadata.csv.CsvUtil.addField;
@ -54,7 +55,9 @@ import org.openmetadata.schema.type.AssetCertification;
import org.openmetadata.schema.type.DatabaseProfilerConfig;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.ProfileSampleConfig;
import org.openmetadata.schema.type.Relationship;
import org.openmetadata.schema.type.StaticSamplingConfig;
import org.openmetadata.schema.type.TagLabel;
import org.openmetadata.schema.type.change.ChangeSource;
import org.openmetadata.schema.type.csv.CsvDocumentation;
@ -345,11 +348,20 @@ public class DatabaseRepository extends EntityRepository<Database> {
UUID databaseId, DatabaseProfilerConfig databaseProfilerConfig) {
// Validate the request content
Database database = find(databaseId, Include.NON_DELETED);
if (databaseProfilerConfig.getProfileSampleType() != null
&& databaseProfilerConfig.getProfileSample() != null) {
EntityUtil.validateProfileSample(
databaseProfilerConfig.getProfileSampleType().toString(),
databaseProfilerConfig.getProfileSample());
ProfileSampleConfig profileSampleConfig = databaseProfilerConfig.getProfileSampleConfig();
if (!nullOrEmpty(profileSampleConfig) && !nullOrEmpty(profileSampleConfig.getConfig())) {
ProfileSampleConfig.SampleConfigType sampleConfigType =
profileSampleConfig.getSampleConfigType();
if (!nullOrEmpty(sampleConfigType)
&& sampleConfigType.equals(ProfileSampleConfig.SampleConfigType.STATIC)) {
StaticSamplingConfig staticConfig =
JsonUtils.convertValue(profileSampleConfig.getConfig(), StaticSamplingConfig.class);
if (staticConfig.getProfileSampleType() != null
&& staticConfig.getProfileSample() != null) {
EntityUtil.validateProfileSample(
staticConfig.getProfileSampleType().toString(), staticConfig.getProfileSample());
}
}
}
daoCollection

View file

@ -55,7 +55,9 @@ import org.openmetadata.schema.type.AssetCertification;
import org.openmetadata.schema.type.DatabaseSchemaProfilerConfig;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.ProfileSampleConfig;
import org.openmetadata.schema.type.Relationship;
import org.openmetadata.schema.type.StaticSamplingConfig;
import org.openmetadata.schema.type.TagLabel;
import org.openmetadata.schema.type.change.ChangeSource;
import org.openmetadata.schema.type.csv.CsvDocumentation;
@ -726,11 +728,20 @@ public class DatabaseSchemaRepository extends EntityRepository<DatabaseSchema> {
// Validate the request content
DatabaseSchema databaseSchema = find(databaseSchemaId, Include.NON_DELETED);
if (databaseSchemaProfilerConfig.getProfileSampleType() != null
&& databaseSchemaProfilerConfig.getProfileSample() != null) {
EntityUtil.validateProfileSample(
databaseSchemaProfilerConfig.getProfileSampleType().toString(),
databaseSchemaProfilerConfig.getProfileSample());
ProfileSampleConfig profileSampleConfig = databaseSchemaProfilerConfig.getProfileSampleConfig();
if (!nullOrEmpty(profileSampleConfig) && !nullOrEmpty(profileSampleConfig.getConfig())) {
ProfileSampleConfig.SampleConfigType sampleConfigType =
profileSampleConfig.getSampleConfigType();
if (!nullOrEmpty(sampleConfigType)
&& sampleConfigType.equals(ProfileSampleConfig.SampleConfigType.STATIC)) {
StaticSamplingConfig staticConfig =
JsonUtils.convertValue(profileSampleConfig.getConfig(), StaticSamplingConfig.class);
if (staticConfig.getProfileSampleType() != null
&& staticConfig.getProfileSample() != null) {
EntityUtil.validateProfileSample(
staticConfig.getProfileSampleType().toString(), staticConfig.getProfileSample());
}
}
}
daoCollection

View file

@ -925,13 +925,21 @@ public class IngestionPipelineRepository extends EntityRepository<IngestionPipel
}
public static void validateProfileSample(IngestionPipeline ingestionPipeline) {
JSONObject sourceConfigJson =
new JSONObject(JsonUtils.pojoToJson(ingestionPipeline.getSourceConfig().getConfig()));
String profileSampleType = sourceConfigJson.optString("profileSampleType");
double profileSample = sourceConfigJson.optDouble("profileSample");
EntityUtil.validateProfileSample(profileSampleType, profileSample);
JSONObject profileSampleConfig = sourceConfigJson.optJSONObject("profileSampleConfig");
if (profileSampleConfig == null) {
return;
}
JSONObject config = profileSampleConfig.optJSONObject("config");
if (config == null) {
return;
}
String profileSampleType = config.optString("profileSampleType", "");
double profileSample = config.optDouble("profileSample", Double.NaN);
if (!profileSampleType.isEmpty() && !Double.isNaN(profileSample)) {
EntityUtil.validateProfileSample(profileSampleType, profileSample);
}
}
/**

View file

@ -96,7 +96,9 @@ import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.JoinedWith;
import org.openmetadata.schema.type.PipelineObservability;
import org.openmetadata.schema.type.ProfileSampleConfig;
import org.openmetadata.schema.type.Relationship;
import org.openmetadata.schema.type.StaticSamplingConfig;
import org.openmetadata.schema.type.SuggestionType;
import org.openmetadata.schema.type.SystemProfile;
import org.openmetadata.schema.type.TableConstraint;
@ -955,11 +957,20 @@ public class TableRepository extends EntityRepository<Table> {
validateColumn(table, columnProfilerConfig.getColumnName());
}
}
if (tableProfilerConfig.getProfileSampleType() != null
&& tableProfilerConfig.getProfileSample() != null) {
EntityUtil.validateProfileSample(
tableProfilerConfig.getProfileSampleType().toString(),
tableProfilerConfig.getProfileSample());
ProfileSampleConfig profileSampleConfig = tableProfilerConfig.getProfileSampleConfig();
if (!nullOrEmpty(profileSampleConfig) && !nullOrEmpty(profileSampleConfig.getConfig())) {
ProfileSampleConfig.SampleConfigType sampleConfigType =
profileSampleConfig.getSampleConfigType();
if (!nullOrEmpty(sampleConfigType)
&& sampleConfigType.equals(ProfileSampleConfig.SampleConfigType.STATIC)) {
StaticSamplingConfig staticConfig =
JsonUtils.convertValue(profileSampleConfig.getConfig(), StaticSamplingConfig.class);
if (staticConfig.getProfileSampleType() != null
&& staticConfig.getProfileSample() != null) {
EntityUtil.validateProfileSample(
staticConfig.getProfileSampleType().toString(), staticConfig.getProfileSample());
}
}
}
}

View file

@ -151,23 +151,12 @@
"javaType": "org.openmetadata.schema.type.DatabaseProfilerConfig",
"description": "This schema defines the type for Database profile config.",
"properties": {
"profileSample": {
"description": "Percentage of data or no. of rows we want to execute the profiler and tests on",
"type": "number",
"default": null
},
"profileSampleType": {
"$ref": "./table.json#/definitions/profileSampleType"
},
"sampleDataCount": {
"description": "Number of row of sample data to be generated",
"type": "integer",
"default": 50,
"title": "Sample Data Rows Count"
},
"samplingMethodType": {
"$ref": "./table.json#/definitions/samplingMethodType"
},
"sampleDataStorageConfig": {
"title": "Storage Config for Sample Data",
"$ref": "../services/connections/connectionBasicType.json#/definitions/sampleDataStorageConfig"
@ -175,7 +164,10 @@
"randomizedSample": {
"description": "Whether to randomize the sample data or not.",
"type": "boolean",
"default": false
"default": true
},
"profileSampleConfig": {
"$ref": "../../type/samplingConfig.json#/definitions/profileSampleConfig"
}
}
},

View file

@ -147,23 +147,12 @@
"javaType": "org.openmetadata.schema.type.DatabaseSchemaProfilerConfig",
"description": "This schema defines the type for Schema profile config.",
"properties": {
"profileSample": {
"description": "Percentage of data or no. of rows we want to execute the profiler and tests on",
"type": "number",
"default": null
},
"profileSampleType": {
"$ref": "./table.json#/definitions/profileSampleType"
},
"sampleDataCount": {
"description": "Number of row of sample data to be generated",
"type": "integer",
"default": 50,
"title": "Sample Data Rows Count"
},
"samplingMethodType": {
"$ref": "./table.json#/definitions/samplingMethodType"
},
"sampleDataStorageConfig": {
"title": "Storage Config for Sample Data",
"$ref": "../services/connections/connectionBasicType.json#/definitions/sampleDataStorageConfig"
@ -171,7 +160,10 @@
"randomizedSample": {
"description": "Whether to randomize the sample data or not.",
"type": "boolean",
"default": false
"default": true
},
"profileSampleConfig": {
"$ref": "../../type/samplingConfig.json#/definitions/profileSampleConfig"
}
}
},

View file

@ -12,21 +12,13 @@
],
"definitions": {
"profileSampleType": {
"description": "Type of Profile Sample (percentage or rows)",
"type": "string",
"enum": [
"PERCENTAGE",
"ROWS"
],
"default": "PERCENTAGE"
"$ref": "../../type/basic.json#/definitions/profileSampleType"
},
"samplingMethodType": {
"description": "Type of Sampling Method (BERNOULLI or SYSTEM)",
"type": "string",
"enum": [
"BERNOULLI",
"SYSTEM"
]
"$ref": "../../type/basic.json#/definitions/samplingMethodType"
},
"profileSampleConfig": {
"$ref": "../../type/samplingConfig.json#/definitions/profileSampleConfig"
},
"tableType": {
"javaType": "org.openmetadata.schema.type.TableType",
@ -839,17 +831,6 @@
"javaType": "org.openmetadata.schema.type.TableProfilerConfig",
"description": "This schema defines the type for Table profile config.",
"properties": {
"profileSampleType": {
"$ref": "#/definitions/profileSampleType"
},
"profileSample": {
"description": "Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests",
"type": "number",
"default": null
},
"samplingMethodType": {
"$ref": "#/definitions/samplingMethodType"
},
"sampleDataCount": {
"description": "Number of sample rows to ingest when 'Generate Sample Data' is enabled",
"type": "integer",
@ -902,6 +883,9 @@
"description": "Table Specific configuration for Profiling it with a Spark Engine. It is ignored for other engines.",
"$ref": "#/definitions/sparkTableProfilerConfig",
"default": null
},
"profileSampleConfig": {
"$ref": "#/definitions/profileSampleConfig"
}
}
},
@ -914,17 +898,6 @@
"description": "Timestamp on which profile is taken.",
"$ref": "../../type/basic.json#/definitions/timestamp"
},
"profileSample": {
"description": "Percentage of data or no. of rows we want to execute the profiler and tests on",
"type": "number",
"default": null
},
"profileSampleType": {
"$ref": "#/definitions/profileSampleType"
},
"samplingMethodType": {
"$ref": "#/definitions/samplingMethodType"
},
"columnCount": {
"description": "No.of columns in the table.",
"type": "number"
@ -949,6 +922,14 @@
"$ref": "#/definitions/customMetricProfile"
},
"default": null
},
"profileSample": {
"description": "Percentage of data or no. of rows we want to execute the profiler and tests on",
"type": "number",
"default": null
},
"profileSampleType": {
"$ref": "#/definitions/profileSampleType"
}
},
"required": [

View file

@ -86,19 +86,8 @@
"default": false,
"title": "Use System Table Statistics"
},
"profileSampleType": {
"$ref": "../entity/data/table.json#/definitions/profileSampleType",
"title": "Profile Sample Type"
},
"profileSample": {
"description": "Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests",
"type": "number",
"default": null,
"title": "Profile Sample"
},
"samplingMethodType": {
"$ref": "../entity/data/table.json#/definitions/samplingMethodType",
"title": "Sampling Method Type"
"profileSampleConfig": {
"$ref": "../type/samplingConfig.json#/definitions/profileSampleConfig"
},
"randomizedSample": {
"description": "Whether to randomize the sample data or not.",

View file

@ -52,11 +52,11 @@
"title": "Profile Sample"
},
"profileSampleType": {
"$ref": "../entity/data/table.json#/definitions/profileSampleType",
"$ref": "../type/basic.json#/definitions/profileSampleType",
"title": "Profile Sample Type"
},
"samplingMethodType": {
"$ref": "../entity/data/table.json#/definitions/samplingMethodType",
"$ref": "../type/basic.json#/definitions/samplingMethodType",
"title": "Sampling Method Type"
},
"testCases": {

View file

@ -297,6 +297,17 @@
"enabled"
],
"additionalProperties": false
},
"profileSampleType": {
"description": "Type of Profile Sample (percentage or rows)",
"type": "string",
"enum": ["PERCENTAGE", "ROWS"],
"default": "PERCENTAGE"
},
"samplingMethodType": {
"description": "Type of Sampling Method (BERNOULLI or SYSTEM)",
"type": "string",
"enum": ["BERNOULLI", "SYSTEM"]
}
}
}

View file

@ -0,0 +1,42 @@
{
"$id": "https://open-metadata.org/schema/type/dynamicSamplingConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "DynamicSamplingConfig",
"javaType": "org.openmetadata.schema.type.DynamicSamplingConfig",
"description": "Configuration for dynamic sampling based on table row count.",
"type": "object",
"properties": {
"thresholds": {
"description": "Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. Tables below the lowest threshold are profiled at 100% (no sampling).",
"type": "array",
"items": {
"type": "object",
"properties": {
"rowCountThreshold": {
"description": "Minimum row count for this tier to apply",
"type": "integer",
"minimum": 1,
"title": "Row Count Threshold"
},
"profileSample": {
"description": "Sample percentage or row count to use for tables at or above this threshold",
"type": "number",
"default": null,
"title": "Profile Sample"
},
"profileSampleType": {
"$ref": "./basic.json#/definitions/profileSampleType",
"title": "Profile Sample Type"
},
"samplingMethodType": {
"$ref": "./basic.json#/definitions/samplingMethodType",
"title": "Sampling Method Type"
}
},
"required": ["rowCountThreshold", "profileSample"],
"additionalProperties": false
}
}
},
"additionalProperties": false
}

View file

@ -0,0 +1,34 @@
{
"$id": "https://open-metadata.org/schema/type/samplingConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "SamplingConfig",
"description": "Sampling configuration types for the profiler.",
"definitions": {
"profileSampleConfig": {
"title": "Profile Sample Config",
"javaType": "org.openmetadata.schema.type.ProfileSampleConfig",
"description": "Profile sample configuration supporting static and dynamic sampling strategies.",
"type": "object",
"properties": {
"sampleConfigType": {
"title": "Sample Config Type",
"description": "Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at runtime based on row count thresholds.",
"type": "string",
"enum": ["STATIC", "DYNAMIC"],
"default": "STATIC"
},
"config": {
"oneOf": [
{
"$ref": "./dynamicSamplingConfig.json"
},
{
"$ref": "./staticSamplingConfig.json"
}
]
}
},
"additionalProperties": false
}
}
}

View file

@ -0,0 +1,25 @@
{
"$id": "https://open-metadata.org/schema/type/staticSamplingConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "StaticSamplingConfig",
"javaType": "org.openmetadata.schema.type.StaticSamplingConfig",
"description": "Configuration for static sampling based on table row count.",
"type": "object",
"properties": {
"profileSample": {
"description": "Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests",
"type": "number",
"default": null,
"title": "Profile Sample"
},
"profileSampleType": {
"$ref": "./basic.json#/definitions/profileSampleType",
"title": "Profile Sample Type"
},
"samplingMethodType": {
"$ref": "./basic.json#/definitions/samplingMethodType",
"title": "Sampling Method Type"
}
},
"additionalProperties": false
}

View file

@ -331,8 +331,13 @@ test.describe(
JSON.stringify({
excludeColumns: [table.entity?.columns[0].name],
profileQuery: 'select * from table',
profileSample: 60,
profileSampleType: 'PERCENTAGE',
profileSampleConfig: {
sampleConfigType: 'STATIC',
config: {
profileSample: 60,
profileSampleType: 'PERCENTAGE',
},
},
includeColumns: [{ columnName: table.entity?.columns[1].name }],
partitioning: {
partitionColumnName: table.entity?.columns[2].name,
@ -371,8 +376,6 @@ test.describe(
JSON.stringify({
excludeColumns: [table.entity?.columns[0].name],
profileQuery: 'select * from table',
profileSample: null,
profileSampleType: 'PERCENTAGE',
includeColumns: [{ columnName: table.entity?.columns[1].name }],
partitioning: {
partitionColumnName: table.entity?.columns[2].name,
@ -395,7 +398,9 @@ test.describe(
await expect(
page.locator('[data-testid="profile-sample"]')
).toBeVisible();
await expect(page.locator('[data-testid="slider-input"]')).toBeEmpty();
await expect(
page.locator('[data-testid="slider-input"]')
).not.toBeVisible();
await expect(
page.getByTestId('profile-sample').locator('div')
).toBeVisible();

View file

@ -36,8 +36,8 @@ import {
} from '../../utils/entity';
import { test } from '../fixtures/pages';
const domain = new Domain();
const dataProduct = new DataProduct([domain]);
let domain: Domain;
let dataProduct: DataProduct;
const entities = [
ApiEndpointClass,
@ -54,6 +54,9 @@ const entities = [
] as const;
test.beforeAll('setup test', async ({ browser }) => {
domain = new Domain();
dataProduct = new DataProduct([domain]);
const { afterAction, apiContext } = await performAdminLogin(browser);
await domain.create(apiContext);
await dataProduct.create(apiContext);

View file

@ -143,8 +143,11 @@ class MysqlIngestionClass extends ServiceBaseClass {
await page.click('[data-menu-id*="profiler"]');
await page.locator('#root\\/profileSample').waitFor();
await page.fill('#root\\/profileSample', '10');
await page.getByTestId('profile-sample-input').waitFor();
await page
.getByTestId('profile-sample-input')
.locator('input')
.fill('10');
await page.click('[data-testid="submit-btn"]');
// Make sure we create ingestion with None schedule to avoid conflict between Airflow and Argo behavior
await this.scheduleIngestion(page);

View file

@ -110,17 +110,27 @@ const ProfilerSettings: FC<ProfilerSettingsProps> = ({
}
};
const profileSampleType =
profilerConfig?.profileSampleConfig?.config?.profileSampleType ??
ProfileSampleType.Percentage;
const uiSchema = useMemo(
() => ({
'ui:order': ['profileSampleType', '*'],
profileSample: {
'ui:widget':
profilerConfig?.profileSampleType === ProfileSampleType.Percentage
? 'range'
: 'updown',
profileSampleConfig: {
'ui:order': ['sampleConfigType', 'config', '*'],
sampleConfigType: { 'ui:widget': 'hidden' },
config: {
'ui:order': ['profileSampleType', 'profileSample', '*'],
profileSample: {
'ui:widget':
profileSampleType === ProfileSampleType.Percentage
? 'range'
: 'updown',
},
},
},
}),
[profilerConfig]
[profileSampleType]
);
useEffect(() => {

View file

@ -56,6 +56,7 @@ import { CSMode } from '../../../../../enums/codemirror.enum';
import {
PartitionIntervalTypes,
ProfileSampleType,
SampleConfigType,
TableProfilerConfig,
} from '../../../../../generated/entity/data/table';
import {
@ -176,11 +177,13 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
includeColumns,
partitioning,
profileQuery,
profileSample,
profileSampleType,
excludeColumns,
sampleDataCount,
profileSampleConfig,
} = tableProfilerConfig;
const staticConfig = profileSampleConfig?.config;
const profileSample = staticConfig?.profileSample;
const profileSampleType = staticConfig?.profileSampleType;
handleStateChange({
sqlQuery: profileQuery ?? '',
profileSample: profileSample,
@ -293,17 +296,25 @@ const ProfilerSettingsModal: React.FC<ProfilerSettingsModalProps> = ({
sampleDataCount,
} = data;
const profileSample = profileSampleType
? profileSampleType === ProfileSampleType.Percentage
? profileSamplePercentage
: profileSampleRows
: undefined;
const profileConfig: TableProfilerConfig = {
excludeColumns: excludeCol.length > 0 ? excludeCol : undefined,
profileQuery: !isEmpty(sqlQuery) ? sqlQuery : undefined,
profileSample: profileSampleType
? profileSampleType === ProfileSampleType.Percentage
? profileSamplePercentage
: profileSampleRows
: undefined,
profileSampleType: isUndefined(profileSampleType)
? undefined
: profileSampleType,
profileSampleConfig:
profileSampleType && profileSample
? {
sampleConfigType: SampleConfigType.Static,
config: {
profileSample,
profileSampleType,
},
}
: undefined,
includeColumns: !isEqual(includeCol, DEFAULT_INCLUDE_PROFILE)
? getIncludesColumns()
: undefined,

View file

@ -11,7 +11,7 @@
* limitations under the License.
*/
import Form, { IChangeEvent } from '@rjsf/core';
import { RegistryFieldsType } from '@rjsf/utils';
import { RegistryFieldsType, UiSchema } from '@rjsf/utils';
import { customizeValidator } from '@rjsf/validator-ajv8';
import { Button, Space } from 'antd';
import classNames from 'classnames';
@ -39,6 +39,7 @@ import DescriptionFieldTemplate from '../../../../common/Form/JSONSchema/JSONSch
import { FieldErrorTemplate } from '../../../../common/Form/JSONSchema/JSONSchemaTemplate/FieldErrorTemplate/FieldErrorTemplate';
import { ObjectFieldTemplate } from '../../../../common/Form/JSONSchema/JSONSchemaTemplate/ObjectFieldTemplate';
import WorkflowArrayFieldTemplate from '../../../../common/Form/JSONSchema/JSONSchemaTemplate/WorkflowArrayFieldTemplate';
import ProfileSampleConfigField from './ProfileSampleConfigField';
const IngestionWorkflowForm: FC<IngestionWorkflowFormProps> = ({
pipeLineType,
@ -78,7 +79,7 @@ const IngestionWorkflowForm: FC<IngestionWorkflowFormProps> = ({
serviceData?.connection?.config?.supportsIncrementalMetadataExtraction;
const uiSchema = useMemo(() => {
let commonSchema = { ...INGESTION_WORKFLOW_UI_SCHEMA };
let commonSchema: UiSchema = { ...INGESTION_WORKFLOW_UI_SCHEMA };
if (isElasticSearchPipeline) {
commonSchema = {
...commonSchema,
@ -93,6 +94,15 @@ const IngestionWorkflowForm: FC<IngestionWorkflowFormProps> = ({
};
}
if (pipeLineType === PipelineType.Profiler) {
commonSchema = {
...commonSchema,
profileSampleConfig: {
'ui:field': 'ProfileSampleConfigField',
},
};
}
return commonSchema;
}, [pipeLineType, operationType]);
@ -142,6 +152,10 @@ const IngestionWorkflowForm: FC<IngestionWorkflowFormProps> = ({
fields['/schemas/rootProcessingEngine'] = SparkAgentField;
}
if (pipeLineType === PipelineType.Profiler) {
fields['ProfileSampleConfigField'] = ProfileSampleConfigField;
}
return fields;
}, [pipeLineType]);

View file

@ -0,0 +1,501 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { FieldProps, IdSchema, Registry } from '@rjsf/utils';
import { fireEvent, render, screen } from '@testing-library/react';
import {
ProfileSampleConfig,
ProfileSampleType,
SampleConfigType,
SamplingMethodType,
} from '../../../../../generated/metadataIngestion/databaseServiceProfilerPipeline';
import ProfileSampleConfigField from './ProfileSampleConfigField';
jest.mock('@untitledui/icons', () => ({
Plus: () => null,
Trash01: () => null,
}));
jest.mock('@openmetadata/ui-core-components', () => {
const CardHeader = ({
title,
extra,
}: {
title?: React.ReactNode;
extra?: React.ReactNode;
}) => (
<div>
<span>{title}</span>
{extra}
</div>
);
const CardContent = ({ children }: { children?: React.ReactNode }) => (
<div>{children}</div>
);
const CardMock = Object.assign(
({
children,
className,
}: {
children?: React.ReactNode;
className?: string;
}) => <div className={className}>{children}</div>,
{ Content: CardContent, Header: CardHeader }
);
const GridItem = ({ children }: { children?: React.ReactNode }) => (
<div>{children}</div>
);
const GridMock = Object.assign(
({
children,
className,
}: {
children?: React.ReactNode;
className?: string;
}) => <div className={className}>{children}</div>,
{ Item: GridItem }
);
const SelectItem = ({ children }: { children?: React.ReactNode }) => (
<div>{children}</div>
);
const SelectMock = Object.assign(
({ 'data-testid': testId }: { 'data-testid'?: string }) => (
<div data-testid={testId} />
),
{ Item: SelectItem }
);
return {
Button: ({
children,
onClick,
'data-testid': testId,
}: {
children?: React.ReactNode;
onClick?: () => void;
'data-testid'?: string;
iconLeading?: React.ComponentType;
color?: string;
size?: string;
}) => (
<button data-testid={testId} onClick={onClick}>
{children}
</button>
),
Card: CardMock,
Grid: GridMock,
Input: ({
'data-testid': testId,
value,
onChange,
type,
}: {
'data-testid'?: string;
value?: string;
onChange?: (value: string) => void;
type?: string;
className?: string;
}) => (
<input
data-testid={testId}
type={type}
value={value}
onChange={(e) => onChange?.(e.target.value)}
/>
),
Select: SelectMock,
Typography: ({
children,
className,
}: {
children?: React.ReactNode;
size?: string;
weight?: string;
className?: string;
as?: React.ElementType;
}) => <span className={className}>{children}</span>,
};
});
const mockOnChange = jest.fn();
const baseFieldProps: FieldProps<ProfileSampleConfig> = {
autofocus: false,
disabled: false,
formContext: {},
formData: undefined,
hideError: undefined,
id: 'root/profileSampleConfig',
name: 'profileSampleConfig',
idSchema: { $id: 'root/profileSampleConfig' } as IdSchema,
idSeparator: '/',
schema: { type: 'object', title: 'Profile Sample Config' },
uiSchema: {},
readonly: false,
required: false,
rawErrors: undefined,
onChange: mockOnChange,
onBlur: jest.fn(),
onFocus: jest.fn(),
registry: {} as Registry,
};
const staticFormData: ProfileSampleConfig = {
sampleConfigType: SampleConfigType.Static,
config: {
profileSample: 80,
profileSampleType: ProfileSampleType.Percentage,
samplingMethodType: SamplingMethodType.Bernoulli,
},
};
const dynamicFormData: ProfileSampleConfig = {
sampleConfigType: SampleConfigType.Dynamic,
config: {
thresholds: [
{
rowCountThreshold: 1000000,
profileSample: 10,
profileSampleType: ProfileSampleType.Percentage,
samplingMethodType: SamplingMethodType.Bernoulli,
},
],
},
};
describe('ProfileSampleConfigField', () => {
beforeEach(() => {
jest.clearAllMocks();
});
describe('Default (STATIC) rendering', () => {
it('renders the sample-config-type selector', () => {
render(<ProfileSampleConfigField {...baseFieldProps} />);
expect(
screen.getByTestId('sample-config-type-select')
).toBeInTheDocument();
expect(screen.getByText('label.sample-config-type')).toBeInTheDocument();
});
it('shows static config fields when sampleConfigType is STATIC', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={staticFormData}
/>
);
expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument();
expect(
screen.getByTestId('profile-sample-type-select')
).toBeInTheDocument();
expect(
screen.getByTestId('sampling-method-type-select')
).toBeInTheDocument();
});
it('shows static config fields by default when no formData is provided', () => {
render(<ProfileSampleConfigField {...baseFieldProps} />);
expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument();
expect(
screen.getByTestId('profile-sample-type-select')
).toBeInTheDocument();
expect(
screen.getByTestId('sampling-method-type-select')
).toBeInTheDocument();
});
it('does not show dynamic threshold section in STATIC mode', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={staticFormData}
/>
);
expect(screen.queryByTestId('add-threshold-btn')).not.toBeInTheDocument();
expect(
screen.queryByText('label.threshold-plural')
).not.toBeInTheDocument();
});
it('displays the profile-sample input', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={staticFormData}
/>
);
expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument();
});
});
describe('DYNAMIC mode rendering', () => {
it('shows the thresholds section when sampleConfigType is DYNAMIC', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
expect(screen.getByText('label.threshold-plural')).toBeInTheDocument();
expect(screen.getByTestId('add-threshold-btn')).toBeInTheDocument();
});
it('does not show static config fields in DYNAMIC mode', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
expect(
screen.queryByTestId('profile-sample-input')
).not.toBeInTheDocument();
expect(
screen.queryByTestId('profile-sample-type-select')
).not.toBeInTheDocument();
expect(
screen.queryByTestId('sampling-method-type-select')
).not.toBeInTheDocument();
});
it('renders a threshold card for each threshold in formData', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
expect(screen.getByText('label.threshold 1')).toBeInTheDocument();
expect(screen.getByTestId('row-count-threshold-0')).toBeInTheDocument();
expect(screen.getByTestId('profile-sample-0')).toBeInTheDocument();
expect(screen.getByTestId('profile-sample-type-0')).toBeInTheDocument();
expect(screen.getByTestId('sampling-method-type-0')).toBeInTheDocument();
});
it('renders multiple threshold cards when multiple thresholds exist', () => {
const multiThresholdData: ProfileSampleConfig = {
sampleConfigType: SampleConfigType.Dynamic,
config: {
thresholds: [
{ rowCountThreshold: 1000000, profileSample: 10 },
{ rowCountThreshold: 500000, profileSample: 20 },
],
},
};
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={multiThresholdData}
/>
);
expect(screen.getByText('label.threshold 1')).toBeInTheDocument();
expect(screen.getByText('label.threshold 2')).toBeInTheDocument();
expect(screen.getByTestId('row-count-threshold-0')).toBeInTheDocument();
expect(screen.getByTestId('row-count-threshold-1')).toBeInTheDocument();
});
it('renders the remove button for each threshold', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
expect(screen.getByTestId('remove-threshold-0')).toBeInTheDocument();
});
it('shows empty threshold list with only the add button when thresholds array is empty', () => {
const emptyDynamic: ProfileSampleConfig = {
sampleConfigType: SampleConfigType.Dynamic,
config: { thresholds: [] },
};
render(
<ProfileSampleConfigField {...baseFieldProps} formData={emptyDynamic} />
);
expect(
screen.queryByTestId('row-count-threshold-0')
).not.toBeInTheDocument();
expect(screen.getByTestId('add-threshold-btn')).toBeInTheDocument();
});
});
describe('Add threshold interaction', () => {
it('calls onChange with a new default threshold when add button is clicked', () => {
const emptyDynamic: ProfileSampleConfig = {
sampleConfigType: SampleConfigType.Dynamic,
config: { thresholds: [] },
};
render(
<ProfileSampleConfigField {...baseFieldProps} formData={emptyDynamic} />
);
fireEvent.click(screen.getByTestId('add-threshold-btn'));
expect(mockOnChange).toHaveBeenCalledWith({
sampleConfigType: SampleConfigType.Dynamic,
config: {
thresholds: [{ rowCountThreshold: 1, profileSample: 100 }],
},
});
});
it('appends a new threshold to existing thresholds when add is clicked', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
fireEvent.click(screen.getByTestId('add-threshold-btn'));
expect(mockOnChange).toHaveBeenCalledWith({
sampleConfigType: SampleConfigType.Dynamic,
config: {
thresholds: [
{
rowCountThreshold: 1000000,
profileSample: 10,
profileSampleType: ProfileSampleType.Percentage,
samplingMethodType: SamplingMethodType.Bernoulli,
},
{ rowCountThreshold: 1, profileSample: 100 },
],
},
});
});
});
describe('Remove threshold interaction', () => {
it('calls onChange with the threshold removed when remove button is clicked', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
fireEvent.click(screen.getByTestId('remove-threshold-0'));
expect(mockOnChange).toHaveBeenCalledWith({
sampleConfigType: SampleConfigType.Dynamic,
config: { thresholds: [] },
});
});
it('removes the correct threshold when one of many is deleted', () => {
const multiThresholdData: ProfileSampleConfig = {
sampleConfigType: SampleConfigType.Dynamic,
config: {
thresholds: [
{ rowCountThreshold: 1000000, profileSample: 10 },
{ rowCountThreshold: 500000, profileSample: 20 },
],
},
};
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={multiThresholdData}
/>
);
fireEvent.click(screen.getByTestId('remove-threshold-0'));
expect(mockOnChange).toHaveBeenCalledWith({
sampleConfigType: SampleConfigType.Dynamic,
config: {
thresholds: [{ rowCountThreshold: 500000, profileSample: 20 }],
},
});
});
});
describe('Config type rendering', () => {
it('shows static fields when formData has STATIC type', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={staticFormData}
/>
);
expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument();
expect(screen.queryByTestId('add-threshold-btn')).not.toBeInTheDocument();
});
it('shows dynamic fields when formData has DYNAMIC type', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
expect(screen.getByTestId('add-threshold-btn')).toBeInTheDocument();
expect(
screen.queryByTestId('profile-sample-input')
).not.toBeInTheDocument();
});
});
describe('Label rendering', () => {
it('renders all field labels in STATIC mode', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={staticFormData}
/>
);
expect(screen.getByText('label.profile-sample')).toBeInTheDocument();
expect(screen.getByText('label.profile-sample-type')).toBeInTheDocument();
expect(
screen.getByText('label.sampling-method-type')
).toBeInTheDocument();
});
it('renders all field labels in DYNAMIC threshold card', () => {
render(
<ProfileSampleConfigField
{...baseFieldProps}
formData={dynamicFormData}
/>
);
expect(screen.getByText('label.row-count-threshold')).toBeInTheDocument();
expect(screen.getByText('label.profile-sample')).toBeInTheDocument();
expect(screen.getByText('label.profile-sample-type')).toBeInTheDocument();
expect(
screen.getByText('label.sampling-method-type')
).toBeInTheDocument();
});
});
});

View file

@ -0,0 +1,340 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
Button,
Card,
Grid,
Input,
Select,
Typography,
} from '@openmetadata/ui-core-components';
import { FieldProps } from '@rjsf/utils';
import { Plus, Trash01 } from '@untitledui/icons';
import { useCallback } from 'react';
import { useTranslation } from 'react-i18next';
import {
ICSamplingConfig,
ProfileSampleConfig,
ProfileSampleType,
SampleConfigType,
SamplingMethodType,
Threshold,
} from '../../../../../generated/metadataIngestion/databaseServiceProfilerPipeline';
const SAMPLE_CONFIG_TYPE_OPTIONS = [
{ id: SampleConfigType.Static, label: 'STATIC' },
{ id: SampleConfigType.Dynamic, label: 'DYNAMIC' },
];
const PROFILE_SAMPLE_TYPE_OPTIONS = [
{ id: ProfileSampleType.Percentage, label: 'PERCENTAGE' },
{ id: ProfileSampleType.Rows, label: 'ROWS' },
];
const SAMPLING_METHOD_TYPE_OPTIONS = [
{ id: SamplingMethodType.Bernoulli, label: 'BERNOULLI' },
{ id: SamplingMethodType.System, label: 'SYSTEM' },
];
const DEFAULT_THRESHOLD: Threshold = {
rowCountThreshold: 1,
profileSample: 100,
};
const ProfileSampleConfigField = (props: FieldProps<ProfileSampleConfig>) => {
const { formData, onChange } = props;
const { t } = useTranslation();
const sampleConfigType =
formData?.sampleConfigType ?? SampleConfigType.Static;
const config: ICSamplingConfig = formData?.config ?? {};
const handleConfigTypeChange = useCallback(
(type: string | number | null) => {
const newConfig: ICSamplingConfig =
type === SampleConfigType.Dynamic ? { thresholds: [] } : {};
onChange({
sampleConfigType: type as SampleConfigType,
config: newConfig,
});
},
[onChange]
);
const handleStaticFieldChange = useCallback(
(
field: keyof ICSamplingConfig,
value: ICSamplingConfig[keyof ICSamplingConfig]
) => {
onChange({ sampleConfigType, config: { ...config, [field]: value } });
},
[sampleConfigType, config, onChange]
);
const handleThresholdChange = useCallback(
(
index: number,
field: keyof Threshold,
value: Threshold[keyof Threshold]
) => {
const thresholds = [...(config.thresholds ?? [])];
thresholds[index] = { ...thresholds[index], [field]: value };
onChange({ sampleConfigType, config: { thresholds } });
},
[sampleConfigType, config, onChange]
);
const handleAddThreshold = useCallback(() => {
const thresholds = [...(config.thresholds ?? []), { ...DEFAULT_THRESHOLD }];
onChange({ sampleConfigType, config: { thresholds } });
}, [sampleConfigType, config, onChange]);
const handleRemoveThreshold = useCallback(
(index: number) => {
const thresholds = (config.thresholds ?? []).filter(
(_, i) => i !== index
);
onChange({ sampleConfigType, config: { thresholds } });
},
[sampleConfigType, config, onChange]
);
return (
<div className="profile-sample-config-field">
<div className="tw:flex tw:flex-col tw:gap-2">
<Typography size="text-sm">{t('label.sample-config-type')}</Typography>
<Select
className="w-full"
data-testid="sample-config-type-select"
fontSize="sm"
items={SAMPLE_CONFIG_TYPE_OPTIONS}
value={sampleConfigType}
onChange={handleConfigTypeChange}>
{(item) => (
<Select.Item id={item.id} key={item.id}>
<Typography size="text-sm">{item.label}</Typography>
</Select.Item>
)}
</Select>
</div>
{sampleConfigType === SampleConfigType.Static && (
<Grid className="m-t-sm" colGap="4" rowGap="2">
<Grid.Item span={12}>
<div className="tw:flex tw:flex-col tw:gap-1">
<Typography size="text-sm">
{t('label.profile-sample')}
</Typography>
<Input
className="w-full"
data-testid="profile-sample-input"
type="number"
value={config.profileSample?.toString() ?? ''}
onChange={(value) =>
handleStaticFieldChange(
'profileSample',
value !== '' ? Number(value) : undefined
)
}
/>
</div>
</Grid.Item>
<Grid.Item span={12}>
<div className="tw:flex tw:flex-col tw:gap-1">
<Typography size="text-sm">
{t('label.profile-sample-type')}
</Typography>
<Select
className="w-full"
data-testid="profile-sample-type-select"
fontSize="sm"
items={PROFILE_SAMPLE_TYPE_OPTIONS}
value={config.profileSampleType ?? null}
onChange={(value) =>
handleStaticFieldChange(
'profileSampleType',
value as ProfileSampleType
)
}>
{(item) => (
<Select.Item id={item.id} key={item.id}>
<Typography size="text-sm">{item.label}</Typography>
</Select.Item>
)}
</Select>
</div>
</Grid.Item>
<Grid.Item span={12}>
<div className="tw:flex tw:flex-col tw:gap-1">
<Typography size="text-sm">
{t('label.sampling-method-type')}
</Typography>
<Select
className="w-full"
data-testid="sampling-method-type-select"
fontSize="sm"
items={SAMPLING_METHOD_TYPE_OPTIONS}
value={config.samplingMethodType ?? null}
onChange={(value) =>
handleStaticFieldChange(
'samplingMethodType',
value as SamplingMethodType
)
}>
{(item) => (
<Select.Item id={item.id} key={item.id}>
<Typography size="text-sm">{item.label}</Typography>
</Select.Item>
)}
</Select>
</div>
</Grid.Item>
</Grid>
)}
{sampleConfigType === SampleConfigType.Dynamic && (
<div className="m-t-sm">
<Typography
className="m-b-xs tw:block"
size="text-sm"
weight="medium">
{t('label.threshold-plural')}
</Typography>
{(config.thresholds ?? []).map((threshold, index) => (
<Card className="m-b-sm" key={index} size="sm">
<Card.Header
extra={
<Button
color="tertiary-destructive"
data-testid={`remove-threshold-${index}`}
iconLeading={Trash01}
size="sm"
onClick={() => handleRemoveThreshold(index)}
/>
}
title={`${t('label.threshold')} ${index + 1}`}
/>
<Card.Content>
<Grid colGap="4" rowGap="2">
<Grid.Item span={12}>
<div className="tw:flex tw:flex-col tw:gap-1">
<Typography size="text-sm">
{t('label.row-count-threshold')}
</Typography>
<Input
className="w-full"
data-testid={`row-count-threshold-${index}`}
type="number"
value={threshold.rowCountThreshold.toString()}
onChange={(value) =>
handleThresholdChange(
index,
'rowCountThreshold',
Number(value) || 1
)
}
/>
</div>
</Grid.Item>
<Grid.Item span={12}>
<div className="tw:flex tw:flex-col tw:gap-1">
<Typography size="text-sm">
{t('label.profile-sample')}
</Typography>
<Input
className="w-full"
data-testid={`profile-sample-${index}`}
type="number"
value={threshold.profileSample.toString()}
onChange={(value) =>
handleThresholdChange(
index,
'profileSample',
Number(value) || 0
)
}
/>
</div>
</Grid.Item>
<Grid.Item span={12}>
<div className="tw:flex tw:flex-col tw:gap-1">
<Typography size="text-sm">
{t('label.profile-sample-type')}
</Typography>
<Select
className="w-full"
data-testid={`profile-sample-type-${index}`}
fontSize="sm"
items={PROFILE_SAMPLE_TYPE_OPTIONS}
value={threshold.profileSampleType ?? null}
onChange={(value) =>
handleThresholdChange(
index,
'profileSampleType',
value as ProfileSampleType
)
}>
{(item) => (
<Select.Item id={item.id} key={item.id}>
<Typography size="text-sm">{item.label}</Typography>
</Select.Item>
)}
</Select>
</div>
</Grid.Item>
<Grid.Item span={12}>
<div className="tw:flex tw:flex-col tw:gap-1">
<Typography size="text-sm">
{t('label.sampling-method-type')}
</Typography>
<Select
className="w-full"
data-testid={`sampling-method-type-${index}`}
fontSize="sm"
items={SAMPLING_METHOD_TYPE_OPTIONS}
value={threshold.samplingMethodType ?? null}
onChange={(value) =>
handleThresholdChange(
index,
'samplingMethodType',
value as SamplingMethodType
)
}>
{(item) => (
<Select.Item id={item.id} key={item.id}>
<Typography size="text-sm">{item.label}</Typography>
</Select.Item>
)}
</Select>
</div>
</Grid.Item>
</Grid>
</Card.Content>
</Card>
))}
<Button
color="secondary"
data-testid="add-threshold-btn"
iconLeading={Plus}
size="sm"
onClick={handleAddThreshold}>
{t('label.add-entity', { entity: t('label.threshold') })}
</Button>
</div>
)}
</div>
);
};
export default ProfileSampleConfigField;

View file

@ -494,7 +494,7 @@ export const ADVANCED_PROPERTIES = [
'includeViews',
'useStatistics',
'confidence',
'samplingMethodType',
'profileSampleConfig',
'randomizedSample',
'sampleDataCount',
'threadCount',

View file

@ -990,13 +990,8 @@ export interface TableProfilerConfig {
/**
* Users' raw SQL query to fetch sample data and profile the table
*/
profileQuery?: string;
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
profileQuery?: string;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
@ -1004,8 +999,7 @@ export interface TableProfilerConfig {
/**
* Number of sample rows to ingest when 'Generate Sample Data' is enabled
*/
sampleDataCount?: number;
samplingMethodType?: SamplingMethodType;
sampleDataCount?: number;
/**
* Table Specific configuration for Profiling it with a Spark Engine. It is ignored for
* other engines.
@ -1077,6 +1071,38 @@ export enum PartitionIntervalUnit {
Year = "YEAR",
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -1093,6 +1119,28 @@ export enum SamplingMethodType {
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Table Specific configuration for Profiling it with a Spark Engine. It is ignored for
* other engines.

View file

@ -57,8 +57,7 @@ export interface Profile {
/**
* No.of rows in the table. This is always executed on the whole table.
*/
rowCount?: number;
samplingMethodType?: SamplingMethodType;
rowCount?: number;
/**
* Table size in GB
*/
@ -258,14 +257,6 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* type of profile
*

View file

@ -976,13 +976,8 @@ export interface TableProfilerConfig {
/**
* Users' raw SQL query to fetch sample data and profile the table
*/
profileQuery?: string;
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
profileQuery?: string;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
@ -990,8 +985,7 @@ export interface TableProfilerConfig {
/**
* Number of sample rows to ingest when 'Generate Sample Data' is enabled
*/
sampleDataCount?: number;
samplingMethodType?: SamplingMethodType;
sampleDataCount?: number;
/**
* Table Specific configuration for Profiling it with a Spark Engine. It is ignored for
* other engines.
@ -1063,6 +1057,38 @@ export enum PartitionIntervalUnit {
Year = "YEAR",
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -1079,6 +1105,28 @@ export enum SamplingMethodType {
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Table Specific configuration for Profiling it with a Spark Engine. It is ignored for
* other engines.

View file

@ -259,8 +259,7 @@ export interface TableProfile {
/**
* No.of rows in the table. This is always executed on the whole table.
*/
rowCount?: number;
samplingMethodType?: SamplingMethodType;
rowCount?: number;
/**
* Table size in GB
*/
@ -278,11 +277,3 @@ export enum ProfileSampleType {
Percentage = "PERCENTAGE",
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}

View file

@ -572,21 +572,13 @@ export interface Pipeline {
/**
* List of metrics to compute. If empty, then all metrics will be computed
*/
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
randomizedSample?: boolean;
samplingMethodType?: SamplingMethodType;
randomizedSample?: boolean;
/**
* Number of threads to use during metric computations
*/
@ -756,6 +748,12 @@ export interface Pipeline {
* Fully qualified name of the entity to be tested, if we're working with a basic suite.
*/
entityFullyQualifiedName?: string;
/**
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
/**
* Service connections to be used for the logical test suite.
*/
@ -2717,6 +2715,38 @@ export enum ProcessingEngineType {
Spark = "Spark",
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -2725,6 +2755,36 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Configuration for SQL query parser selection for lineage extraction.
*
@ -2761,14 +2821,6 @@ export enum QueryParserType {
SQLGlot = "SqlGlot",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* Service connections available for the logical test suite.
*/

View file

@ -503,11 +503,7 @@ export interface EntityReference {
* This schema defines the type for Database profile config.
*/
export interface DatabaseProfilerConfig {
/**
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
@ -517,10 +513,41 @@ export interface DatabaseProfilerConfig {
*/
sampleDataCount?: number;
sampleDataStorageConfig?: SampleDataStorageConfig;
samplingMethodType?: SamplingMethodType;
[property: string]: any;
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -529,6 +556,36 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Storage config to store sample data
*/
@ -613,14 +670,6 @@ export interface AwsCredentials {
profileName?: string;
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* Status of the Database.
*

View file

@ -499,11 +499,7 @@ export interface EntityReference {
* This schema defines the type for Schema profile config.
*/
export interface DatabaseSchemaProfilerConfig {
/**
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
@ -513,10 +509,41 @@ export interface DatabaseSchemaProfilerConfig {
*/
sampleDataCount?: number;
sampleDataStorageConfig?: SampleDataStorageConfig;
samplingMethodType?: SamplingMethodType;
[property: string]: any;
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -525,6 +552,36 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Storage config to store sample data
*/
@ -609,14 +666,6 @@ export interface AwsCredentials {
profileName?: string;
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* Status of the DatabaseSchema.
*

View file

@ -1309,8 +1309,7 @@ export interface TableProfile {
/**
* No.of rows in the table. This is always executed on the whole table.
*/
rowCount?: number;
samplingMethodType?: SamplingMethodType;
rowCount?: number;
/**
* Table size in GB
*/
@ -1329,14 +1328,6 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* Sample data for a table.
*
@ -1518,13 +1509,8 @@ export interface TableProfilerConfig {
/**
* Users' raw SQL query to fetch sample data and profile the table
*/
profileQuery?: string;
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
profileQuery?: string;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
@ -1532,8 +1518,7 @@ export interface TableProfilerConfig {
/**
* Number of sample rows to ingest when 'Generate Sample Data' is enabled
*/
sampleDataCount?: number;
samplingMethodType?: SamplingMethodType;
sampleDataCount?: number;
/**
* Table Specific configuration for Profiling it with a Spark Engine. It is ignored for
* other engines.
@ -1605,6 +1590,68 @@ export enum PartitionIntervalUnit {
Year = "YEAR",
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Table Specific configuration for Profiling it with a Spark Engine. It is ignored for
* other engines.

View file

@ -1255,21 +1255,13 @@ export interface Pipeline {
/**
* List of metrics to compute. If empty, then all metrics will be computed
*/
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
randomizedSample?: boolean;
samplingMethodType?: SamplingMethodType;
randomizedSample?: boolean;
/**
* Number of threads to use during metric computations
*/
@ -1439,6 +1431,12 @@ export interface Pipeline {
* Fully qualified name of the entity to be tested, if we're working with a basic suite.
*/
entityFullyQualifiedName?: string;
/**
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
/**
* Service connections to be used for the logical test suite.
*/
@ -3301,6 +3299,38 @@ export enum ProcessingEngineType {
Spark = "Spark",
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -3309,6 +3339,36 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Configuration for SQL query parser selection for lineage extraction.
*
@ -3345,14 +3405,6 @@ export enum QueryParserType {
SQLGlot = "SqlGlot",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* Service connections available for the logical test suite.
*/

View file

@ -40,19 +40,13 @@ export interface DatabaseServiceProfilerPipeline {
/**
* List of metrics to compute. If empty, then all metrics will be computed
*/
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
randomizedSample?: boolean;
samplingMethodType?: SamplingMethodType;
randomizedSample?: boolean;
/**
* Regex to only fetch tables or databases that matches the pattern.
*/
@ -193,6 +187,38 @@ export enum Type {
Spark = "Spark",
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -209,6 +235,28 @@ export enum SamplingMethodType {
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Pipeline type
*

View file

@ -5439,21 +5439,13 @@ export interface Pipeline {
/**
* List of metrics to compute. If empty, then all metrics will be computed
*/
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
metrics?: MetricType[];
processingEngine?: ProcessingEngine;
profileSampleConfig?: ProfileSampleConfig;
/**
* Whether to randomize the sample data or not.
*/
randomizedSample?: boolean;
samplingMethodType?: SamplingMethodType;
randomizedSample?: boolean;
/**
* Number of threads to use during metric computations
*/
@ -5623,6 +5615,12 @@ export interface Pipeline {
* Fully qualified name of the entity to be tested, if we're working with a basic suite.
*/
entityFullyQualifiedName?: string;
/**
* Percentage of data or no. of rows we want to execute the profiler and tests on
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
/**
* Service connections to be used for the logical test suite.
*/
@ -7312,6 +7310,38 @@ export enum ProcessingEngineType {
Spark = "Spark",
}
/**
* Profile sample configuration supporting static and dynamic sampling strategies.
*/
export interface ProfileSampleConfig {
config?: ICSamplingConfig;
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
sampleConfigType?: SampleConfigType;
}
/**
* Configuration for dynamic sampling based on table row count.
*
* Configuration for static sampling based on table row count.
*/
export interface ICSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
@ -7320,6 +7350,36 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at
* runtime based on row count thresholds.
*/
export enum SampleConfigType {
Dynamic = "DYNAMIC",
Static = "STATIC",
}
/**
* Configuration for SQL query parser selection for lineage extraction.
*
@ -7356,14 +7416,6 @@ export enum QueryParserType {
SQLGlot = "SqlGlot",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* Service connections available for the logical test suite.
*/

View file

@ -0,0 +1,51 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Configuration for dynamic sampling based on table row count.
*/
export interface DynamicSamplingConfig {
/**
* Row count thresholds for sampling. Evaluated in order from highest to lowest threshold.
* Tables below the lowest threshold are profiled at 100% (no sampling).
*/
thresholds?: Threshold[];
}
export interface Threshold {
/**
* Sample percentage or row count to use for tables at or above this threshold
*/
profileSample: number;
profileSampleType?: ProfileSampleType;
/**
* Minimum row count for this tier to apply
*/
rowCountThreshold: number;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
export enum ProfileSampleType {
Percentage = "PERCENTAGE",
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}

View file

@ -117,8 +117,7 @@ export interface Profile {
/**
* No.of rows in the table. This is always executed on the whole table.
*/
rowCount?: number;
samplingMethodType?: SamplingMethodType;
rowCount?: number;
/**
* Table size in GB
*/
@ -318,14 +317,6 @@ export enum ProfileSampleType {
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}
/**
* type of profile
*

View file

@ -0,0 +1,40 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Configuration for static sampling based on table row count.
*/
export interface StaticSamplingConfig {
/**
* Percentage of data or no. of rows used to compute the profiler metrics and run data
* quality tests
*/
profileSample?: number;
profileSampleType?: ProfileSampleType;
samplingMethodType?: SamplingMethodType;
}
/**
* Type of Profile Sample (percentage or rows)
*/
export enum ProfileSampleType {
Percentage = "PERCENTAGE",
Rows = "ROWS",
}
/**
* Type of Sampling Method (BERNOULLI or SYSTEM)
*/
export enum SamplingMethodType {
Bernoulli = "BERNOULLI",
System = "SYSTEM",
}

View file

@ -3,18 +3,35 @@
"javaType": "org.openmetadata.schema.type.DatabaseSchemaProfilerConfig",
"description": "This schema defines the type for Schema profile config.",
"properties": {
"profileSample": {
"description": "Percentage of data or no. of rows we want to execute the profiler and tests on",
"type": "number",
"default": 100,
"title": "Profile Sample"
},
"profileSampleType": {
"description": "Type of Profile Sample (percentage or rows)",
"type": "string",
"enum": ["PERCENTAGE", "ROWS"],
"default": "PERCENTAGE",
"title": "Profile Sample Value"
"profileSampleConfig": {
"title": "Profile Sample Configuration",
"type": "object",
"properties": {
"sampleConfigType": {
"type": "string",
"enum": ["STATIC", "DYNAMIC"],
"default": "STATIC"
},
"config": {
"title": "Sampling Configuration",
"type": "object",
"properties": {
"profileSampleType": {
"description": "Type of Profile Sample (percentage or rows)",
"type": "string",
"enum": ["PERCENTAGE", "ROWS"],
"default": "PERCENTAGE",
"title": "Profile Sample Value"
},
"profileSample": {
"description": "Percentage of data or no. of rows we want to execute the profiler and tests on",
"type": "number",
"default": 100,
"title": "Profile Sample"
}
}
}
}
},
"sampleDataCount": {
"description": "Number of row of sample data to be generated",

View file

@ -650,6 +650,7 @@
"drive-plural": "محركات الأقراص",
"duplicate": "تكرار",
"duration": "المدة",
"dynamic": "ديناميكي",
"dynamic-assertion": "تأكيد ديناميكي",
"edge": "حافة",
"edge-bundling": "تجميع الحواف",
@ -1568,6 +1569,7 @@
"profile-config": "تكوين ملف التعريف",
"profile-lowercase": "ملف تعريف",
"profile-name": "اسم ملف التعريف",
"profile-sample": "عينة الملف الشخصي",
"profile-sample-type": "عينة ملف التعريف {{type}}",
"profiler": "محلل البيانات",
"profiler-configuration": "تكوين محلل البيانات",
@ -1724,6 +1726,7 @@
"row": "صف",
"row-count": "عدد الصفوف",
"row-count-lowercase": "عدد الصفوف",
"row-count-threshold": "حد عدد الصفوف",
"row-filter": "تصفية الصف",
"row-filter-plural": "تصفية الصفوف",
"row-limit": "حد الصف",
@ -1748,10 +1751,12 @@
"runs-for": "تشغيل لـ",
"s3-config-source": "مصدر تكوين S3",
"sample": "عينة",
"sample-config-type": "نوع تكوين العينة",
"sample-data": "بيانات العينة",
"sample-data-count": "عدد بيانات العينة",
"sample-data-count-lowercase": "عدد بيانات العينة",
"sample-data-ingestion-configuration": "إعداد استيعاب بيانات العينة",
"sampling-method-type": "نوع طريقة أخذ العينات",
"saturday": "السبت",
"save": "حفظ",
"save-changes": "حفظ التغييرات",
@ -1939,6 +1944,7 @@
"started-following": "بدأ المتابعة",
"starting-offset": "إزاحة البدء",
"state": "الحالة",
"static": "ثابت",
"status": "الحالة",
"stay-up-to-date": "ابقَ على اطلاع",
"step": "خطوة",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "سلاسل",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "حد",
"threshold-plural": "حدود",
"thursday": "الخميس",
"tier": "مستوى",
"tier-label-type": "نوع تسمية المستوى",

View file

@ -650,6 +650,7 @@
"drive-plural": "Laufwerke",
"duplicate": "Duplikat",
"duration": "Dauer",
"dynamic": "Dynamisch",
"dynamic-assertion": "Dynamische Assertion",
"edge": "Kante",
"edge-bundling": "Kantenbündelung",
@ -1568,6 +1569,7 @@
"profile-config": "Profil-Konfiguration",
"profile-lowercase": "profil",
"profile-name": "Name des Profils",
"profile-sample": "Profilstichprobe",
"profile-sample-type": "Profil-Sample-Typ {{type}}",
"profiler": "Profiler",
"profiler-configuration": "Profiler-Konfiguration",
@ -1724,6 +1726,7 @@
"row": "Zeile",
"row-count": "Zeilenzahl",
"row-count-lowercase": "Anzahl der Zeilen",
"row-count-threshold": "Zeilenanzahl-Schwellenwert",
"row-filter": "Zeilenfilter",
"row-filter-plural": "Zeilenfilter",
"row-limit": "Zeilenlimit",
@ -1748,10 +1751,12 @@
"runs-for": "Läuft für",
"s3-config-source": "S3-Konfigurationsquelle",
"sample": "Beispiel",
"sample-config-type": "Stichproben-Konfigurationstyp",
"sample-data": "Beispieldaten",
"sample-data-count": "Beispieldatenanzahl",
"sample-data-count-lowercase": "Anzahl der Beispieldaten",
"sample-data-ingestion-configuration": "Konfiguration der Beispieldaten-Aufnahme",
"sampling-method-type": "Stichprobenverfahrenstyp",
"saturday": "Samstag",
"save": "Speichern",
"save-changes": "Änderungen speichern",
@ -1939,6 +1944,7 @@
"started-following": "Hat begonnen zu folgen",
"starting-offset": "Start-Offset",
"state": "Zustand",
"static": "Statisch",
"status": "Status",
"stay-up-to-date": "Bleiben Sie auf dem neuesten Stand",
"step": "Schritt",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "Threads",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Schwellenwert",
"threshold-plural": "Schwellenwerte",
"thursday": "Donnerstag",
"tier": "Stufe",
"tier-label-type": "Stufen-Etikettentyp",

View file

@ -650,6 +650,7 @@
"drive-plural": "Drives",
"duplicate": "Duplicate",
"duration": "Duration",
"dynamic": "Dynamic",
"dynamic-assertion": "Dynamic Assertion",
"edge": "Edge",
"edge-bundling": "Edge Bundling",
@ -1568,7 +1569,8 @@
"profile-config": "Profile config",
"profile-lowercase": "profile",
"profile-name": "Profile Name",
"profile-sample-type": "Profile Sample {{type}}",
"profile-sample": "Profile Sample",
"profile-sample-type": "Profile Sample type",
"profiler": "Profiler",
"profiler-configuration": "Profiler Configuration",
"profiler-ingestion": "Profiler Ingestion",
@ -1724,6 +1726,7 @@
"row": "Row",
"row-count": "Row Count",
"row-count-lowercase": "row count",
"row-count-threshold": "Row Count Threshold",
"row-filter": "Row Filter",
"row-filter-plural": "Row Filters",
"row-limit": "Row Limit",
@ -1748,10 +1751,12 @@
"runs-for": "Runs for",
"s3-config-source": "S3 Config Source",
"sample": "Sample",
"sample-config-type": "Sample Config Type",
"sample-data": "Sample Data",
"sample-data-count": "Sample Data Count",
"sample-data-count-lowercase": "sample data count",
"sample-data-ingestion-configuration": "Sample Data Ingestion Configuration",
"sampling-method-type": "Sampling Method Type",
"saturday": "Saturday",
"save": "Save",
"save-changes": "Save changes",
@ -1939,6 +1944,7 @@
"started-following": "Started following",
"starting-offset": "Starting Offset",
"state": "State",
"static": "Static",
"status": "Status",
"stay-up-to-date": "Stay Up-to-date",
"step": "Step",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "threads",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Threshold",
"threshold-plural": "Thresholds",
"thursday": "Thursday",
"tier": "Tier",
"tier-label-type": "Tier Label Type",

View file

@ -650,6 +650,7 @@
"drive-plural": "Unidades",
"duplicate": "Duplicar",
"duration": "Duración",
"dynamic": "Dinámico",
"dynamic-assertion": "Afirmación dinámica",
"edge": "Arista",
"edge-bundling": "Agrupación de Aristas",
@ -1568,6 +1569,7 @@
"profile-config": "Configuración del perfil",
"profile-lowercase": "perfil",
"profile-name": "Nombre del perfil",
"profile-sample": "Muestra de perfil",
"profile-sample-type": "Muestra de perfil {{type}}",
"profiler": "Perfilador",
"profiler-configuration": "Configuración del Perfilador",
@ -1724,6 +1726,7 @@
"row": "Fila",
"row-count": "Conteo Fila",
"row-count-lowercase": "número de filas",
"row-count-threshold": "Umbral de recuento de filas",
"row-filter": "Filtro de fila",
"row-filter-plural": "Filtros de fila",
"row-limit": "Límite de filas",
@ -1748,10 +1751,12 @@
"runs-for": "Ejecuciones para",
"s3-config-source": "Fuente de Configuración S3",
"sample": "Muestra",
"sample-config-type": "Tipo de configuración de muestra",
"sample-data": "Datos de Muestra",
"sample-data-count": "Número de datos de muestra",
"sample-data-count-lowercase": "número de datos de muestra",
"sample-data-ingestion-configuration": "Configuración de Ingesta de Datos de Ejemplo",
"sampling-method-type": "Tipo de método de muestreo",
"saturday": "Sábado",
"save": "Guardar",
"save-changes": "Guardar cambios",
@ -1939,6 +1944,7 @@
"started-following": "Comenzó a seguir",
"starting-offset": "Desplazamiento inicial",
"state": "Estado",
"static": "Estático",
"status": "Estado",
"stay-up-to-date": "Manténgase Actualizado",
"step": "Paso",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "hilos",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Umbral",
"threshold-plural": "Umbrales",
"thursday": "Jueves",
"tier": "Nivel",
"tier-label-type": "Tipo de Etiqueta de Nivel",

View file

@ -650,6 +650,7 @@
"drive-plural": "Lecteurs",
"duplicate": "Dupliquer",
"duration": "Durée",
"dynamic": "Dynamique",
"dynamic-assertion": "Assertion Dynamique",
"edge": "Bord",
"edge-bundling": "Regroupement des Arêtes",
@ -1568,6 +1569,7 @@
"profile-config": "Configuration de Profil",
"profile-lowercase": "profil",
"profile-name": "Nom du Profil",
"profile-sample": "Échantillon de profil",
"profile-sample-type": "Échantillon du Profil {{type}}",
"profiler": "Profilage",
"profiler-configuration": "Configuration du profileur",
@ -1724,6 +1726,7 @@
"row": "Ligne",
"row-count": "Nombre de Lignes",
"row-count-lowercase": "Nombre de Ligne",
"row-count-threshold": "Seuil de nombre de lignes",
"row-filter": "Filtre de ligne",
"row-filter-plural": "Filtres de ligne",
"row-limit": "Limite de lignes",
@ -1748,10 +1751,12 @@
"runs-for": "Exécutions pour",
"s3-config-source": "Source de Configuration S3",
"sample": "Échantillon",
"sample-config-type": "Type de configuration d'échantillon",
"sample-data": "Échantillon de Données",
"sample-data-count": "Nombre de données Echantillon",
"sample-data-count-lowercase": "nombre de données échantillon",
"sample-data-ingestion-configuration": "Configuration d'Ingestion des Données d'Exemple",
"sampling-method-type": "Type de méthode d'échantillonnage",
"saturday": "Samedi",
"save": "Enregistrer",
"save-changes": "Enregistrer les modifications",
@ -1939,6 +1944,7 @@
"started-following": "A commencé à suivre",
"starting-offset": "Décalage de départ",
"state": "État",
"static": "Statique",
"status": "Statut",
"stay-up-to-date": "Rester à Jour",
"step": "Étape",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "fils",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Seuil",
"threshold-plural": "Seuils",
"thursday": "Jeudi",
"tier": "Niveau",
"tier-label-type": "Type d'Étiquette de Niveau",

View file

@ -650,6 +650,7 @@
"drive-plural": "Unidades",
"duplicate": "Duplicar",
"duration": "Duración",
"dynamic": "Dinámico",
"dynamic-assertion": "Aserción dinámica",
"edge": "Borde",
"edge-bundling": "Agrupamento de Arestas",
@ -1568,6 +1569,7 @@
"profile-config": "Configuración do perfil",
"profile-lowercase": "perfil",
"profile-name": "Nome do perfil",
"profile-sample": "Mostra de perfil",
"profile-sample-type": "Perfil de mostra {{type}}",
"profiler": "Perfilador",
"profiler-configuration": "Configuración do perfilador",
@ -1724,6 +1726,7 @@
"row": "Fila",
"row-count": "Reconto de filas",
"row-count-lowercase": "reconto de filas",
"row-count-threshold": "Limiar de contaxe de filas",
"row-filter": "Filtro de fila",
"row-filter-plural": "Filtros de fila",
"row-limit": "Límite de filas",
@ -1748,10 +1751,12 @@
"runs-for": "Executado para",
"s3-config-source": "Fonte de configuración S3",
"sample": "Mostra",
"sample-config-type": "Tipo de configuración de mostra",
"sample-data": "Datos de mostra",
"sample-data-count": "Reconto de datos de mostra",
"sample-data-count-lowercase": "reconto de datos de mostra",
"sample-data-ingestion-configuration": "Configuración de Inxesta de Datos de Exemplo",
"sampling-method-type": "Tipo de método de mostraxe",
"saturday": "Sábado",
"save": "Gardar",
"save-changes": "Gardar cambios",
@ -1939,6 +1944,7 @@
"started-following": "Comezou a seguir",
"starting-offset": "Desprazamento inicial",
"state": "Estado",
"static": "Estático",
"status": "Estado",
"stay-up-to-date": "Mantente ao día",
"step": "Paso",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "fíos",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Limiar",
"threshold-plural": "Limiares",
"thursday": "Xoves",
"tier": "Nivel",
"tier-label-type": "Tipo de Etiqueta de Nivel",

View file

@ -650,6 +650,7 @@
"drive-plural": "כוננים",
"duplicate": "שכפל",
"duration": "משך זמן",
"dynamic": "דינמי",
"dynamic-assertion": "אסרציה דינמית",
"edge": "קצה",
"edge-bundling": "צרור קשתות",
@ -1568,6 +1569,7 @@
"profile-config": "תצורת פרופיל",
"profile-lowercase": "פרופיל",
"profile-name": "שם הפרופיל",
"profile-sample": "דגימת פרופיל",
"profile-sample-type": "דוגמת פרופיל {{type}}",
"profiler": "מדד ואיכות נתונים",
"profiler-configuration": "תצורת פרופיילר",
@ -1724,6 +1726,7 @@
"row": "שורה",
"row-count": "מספר שורות",
"row-count-lowercase": "מספר שורות",
"row-count-threshold": "סף ספירת שורות",
"row-filter": "מסנן שורות",
"row-filter-plural": "מסנני שורות",
"row-limit": "מגבלת שורות",
@ -1748,10 +1751,12 @@
"runs-for": "ריצות עבור",
"s3-config-source": "מקור הגדרות S3",
"sample": "דוגמה",
"sample-config-type": "סוג תצורת דגימה",
"sample-data": "נתוני דוגמה",
"sample-data-count": "מספר נתוני דוגמה",
"sample-data-count-lowercase": "מספר נתוני דוגמה",
"sample-data-ingestion-configuration": "הגדרת קליטת נתוני דוגמה",
"sampling-method-type": "סוג שיטת דגימה",
"saturday": "יום שבת",
"save": "שמור",
"save-changes": "שמירת שינויים",
@ -1939,6 +1944,7 @@
"started-following": "התחיל לעקוב",
"starting-offset": "היסט התחלתי",
"state": "מצב",
"static": "סטטי",
"status": "סטטוס",
"stay-up-to-date": "הישאר מעודכן",
"step": "שלב",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "נושאים",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "סף",
"threshold-plural": "ספים",
"thursday": "יום חמישי",
"tier": "שכבת מידע",
"tier-label-type": "סוג תווית שכבת מידע",

View file

@ -650,6 +650,7 @@
"drive-plural": "ドライブ",
"duplicate": "重複",
"duration": "所要時間",
"dynamic": "動的",
"dynamic-assertion": "動的アサーション",
"edge": "エッジ",
"edge-bundling": "エッジバンドリング",
@ -1568,6 +1569,7 @@
"profile-config": "プロファイル設定",
"profile-lowercase": "プロファイル",
"profile-name": "プロファイル名",
"profile-sample": "プロファイルサンプル",
"profile-sample-type": "サンプル {{type}} のプロファイル",
"profiler": "プロファイラー",
"profiler-configuration": "プロファイラー設定",
@ -1724,6 +1726,7 @@
"row": "行",
"row-count": "行数",
"row-count-lowercase": "行数",
"row-count-threshold": "行数しきい値",
"row-filter": "行フィルター",
"row-filter-plural": "行フィルター",
"row-limit": "行数制限",
@ -1748,10 +1751,12 @@
"runs-for": "対象:",
"s3-config-source": "S3 設定ソース",
"sample": "サンプル",
"sample-config-type": "サンプル設定タイプ",
"sample-data": "サンプルデータ",
"sample-data-count": "サンプルデータ件数",
"sample-data-count-lowercase": "サンプルデータ件数",
"sample-data-ingestion-configuration": "サンプルデータ取り込み設定",
"sampling-method-type": "サンプリング方法タイプ",
"saturday": "土曜日",
"save": "保存",
"save-changes": "変更を保存",
@ -1939,6 +1944,7 @@
"started-following": "フォローを開始しました",
"starting-offset": "開始オフセット",
"state": "状態",
"static": "静的",
"status": "ステータス",
"stay-up-to-date": "常に最新情報を取得",
"step": "ステップ",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "スレッド",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "しきい値",
"threshold-plural": "しきい値",
"thursday": "木曜日",
"tier": "ティア",
"tier-label-type": "ティアラベルタイプ",

View file

@ -650,6 +650,7 @@
"drive-plural": "드라이브",
"duplicate": "복제",
"duration": "기간",
"dynamic": "동적",
"dynamic-assertion": "동적 단언",
"edge": "엣지",
"edge-bundling": "엣지 번들링",
@ -1568,6 +1569,7 @@
"profile-config": "프로필 구성",
"profile-lowercase": "프로필",
"profile-name": "프로필 이름",
"profile-sample": "프로파일 샘플",
"profile-sample-type": "프로필 샘플 {{type}}",
"profiler": "프로파일러",
"profiler-configuration": "프로파일러 구성",
@ -1724,6 +1726,7 @@
"row": "행",
"row-count": "행 수",
"row-count-lowercase": "행 수",
"row-count-threshold": "행 수 임계값",
"row-filter": "행 필터",
"row-filter-plural": "행 필터",
"row-limit": "행 제한",
@ -1748,10 +1751,12 @@
"runs-for": "실행 기간",
"s3-config-source": "S3 설정 소스",
"sample": "샘플",
"sample-config-type": "샘플 구성 유형",
"sample-data": "샘플 데이터",
"sample-data-count": "샘플 데이터 수",
"sample-data-count-lowercase": "샘플 데이터 수",
"sample-data-ingestion-configuration": "샘플 데이터 수집 구성",
"sampling-method-type": "샘플링 방법 유형",
"saturday": "토요일",
"save": "저장",
"save-changes": "변경 사항 저장",
@ -1939,6 +1944,7 @@
"started-following": "팔로우 시작됨",
"starting-offset": "시작 오프셋",
"state": "상태",
"static": "정적",
"status": "현황",
"stay-up-to-date": "최신 정보 유지",
"step": "단계",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "스레드들",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "임계값",
"threshold-plural": "임계값",
"thursday": "목요일",
"tier": "계층",
"tier-label-type": "계층 라벨 유형",

View file

@ -650,6 +650,7 @@
"drive-plural": "ड्राइव्हस्",
"duplicate": "डुप्लिकेट",
"duration": "कालावधी",
"dynamic": "गतिमान",
"dynamic-assertion": "डायनॅमिक अॅसर्शन",
"edge": "काठ",
"edge-bundling": "कड्यांचे गटबंधन",
@ -1568,6 +1569,7 @@
"profile-config": "प्रोफाइल कॉन्फिग",
"profile-lowercase": "प्रोफाइल",
"profile-name": "प्रोफाइल नाव",
"profile-sample": "प्रोफाइल नमुना",
"profile-sample-type": "प्रोफाइल नमुना {{type}}",
"profiler": "प्रोफाइलर",
"profiler-configuration": "प्रोफाइलर संरचना",
@ -1724,6 +1726,7 @@
"row": "पंक्ति",
"row-count": "Row Count",
"row-count-lowercase": "पंक्ति संख्या",
"row-count-threshold": "पंक्ती संख्या उंबरठा",
"row-filter": "पंक्ती फिल्टर",
"row-filter-plural": "पंक्ती फिल्टर",
"row-limit": "पंक्ती मर्यादा",
@ -1748,10 +1751,12 @@
"runs-for": "साठी चालते",
"s3-config-source": "S3 कॉन्फिग स्रोत",
"sample": "नमुना",
"sample-config-type": "नमुना कॉन्फिग प्रकार",
"sample-data": "नमुना डेटा",
"sample-data-count": "नमुना डेटा संख्या",
"sample-data-count-lowercase": "नमुना डेटा संख्या",
"sample-data-ingestion-configuration": "नमुना डेटा अंतर्ग्रहण कॉन्फिगरेशन",
"sampling-method-type": "नमुना पद्धत प्रकार",
"saturday": "शनिवार",
"save": "जतन करा",
"save-changes": "बदल जतन करा",
@ -1939,6 +1944,7 @@
"started-following": "अनुसरण करणे प्रारंभ केले",
"starting-offset": "प्रारंभिक ऑफसेट",
"state": "अवस्था",
"static": "स्थिर",
"status": "स्थिती",
"stay-up-to-date": "अप-टू-डेट रहा",
"step": "पाऊल",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "थ्रेड्स",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "उंबरठा",
"threshold-plural": "उंबरठे",
"thursday": "गुरुवार",
"tier": "स्तर",
"tier-label-type": "स्तर लेबल प्रकार",

View file

@ -650,6 +650,7 @@
"drive-plural": "Schijven",
"duplicate": "Dupliceren",
"duration": "Duur",
"dynamic": "Dynamisch",
"dynamic-assertion": "Dynamic Assertion",
"edge": "Verbinding",
"edge-bundling": "Kantenbundeling",
@ -1568,6 +1569,7 @@
"profile-config": "Profielconfiguratie",
"profile-lowercase": "profiel",
"profile-name": "Profielnaam",
"profile-sample": "Profielsteekproef",
"profile-sample-type": "Voorbeeldprofiel {{type}}",
"profiler": "Profiler",
"profiler-configuration": "Profiler Configuration",
@ -1724,6 +1726,7 @@
"row": "Rij",
"row-count": "Aantal rijen",
"row-count-lowercase": "aantal rijen",
"row-count-threshold": "Drempelwaarde rijenaantal",
"row-filter": "Rijfilter",
"row-filter-plural": "Rijfilters",
"row-limit": "Rijlimiet",
@ -1748,10 +1751,12 @@
"runs-for": "Uitgevoerd voor",
"s3-config-source": "S3 Configuratiebron",
"sample": "Voorbeeld",
"sample-config-type": "Type steekproefconfiguratie",
"sample-data": "Voorbeelddata",
"sample-data-count": "Aantal voorbeelddata",
"sample-data-count-lowercase": "aantal voorbeelddata",
"sample-data-ingestion-configuration": "Configuratie van Voorbeeldgegevens Opname",
"sampling-method-type": "Type steekproefmethode",
"saturday": "zaterdag",
"save": "Opslaan",
"save-changes": "Wijzigingen opslaan",
@ -1939,6 +1944,7 @@
"started-following": "Begonnen met volgen",
"starting-offset": "Startoffset",
"state": "Toestand",
"static": "Statisch",
"status": "Status",
"stay-up-to-date": "Blijf Up-to-date",
"step": "Stap",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "draadjes",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Drempelwaarde",
"threshold-plural": "Drempelwaarden",
"thursday": "donderdag",
"tier": "Niveau",
"tier-label-type": "Niveau-etikettype",

View file

@ -650,6 +650,7 @@
"drive-plural": "درایوها",
"duplicate": "تکراری",
"duration": "مدت زمان",
"dynamic": "Dinâmico",
"dynamic-assertion": "ادعای پویا",
"edge": "لبه",
"edge-bundling": "دسته‌بندی یال‌ها",
@ -1568,6 +1569,7 @@
"profile-config": "پیکربندی پروفایل",
"profile-lowercase": "پروفایل",
"profile-name": "نام پروفایل",
"profile-sample": "Amostra de perfil",
"profile-sample-type": "نوع نمونه پروفایل {{type}}",
"profiler": "پروفایلر",
"profiler-configuration": "پیکربندی پروفایلر",
@ -1724,6 +1726,7 @@
"row": "ردیف",
"row-count": "Row Count",
"row-count-lowercase": "تعداد ردیف",
"row-count-threshold": "Limiar de contagem de linhas",
"row-filter": "ਕਤਾਰ ਫਿਲਟਰ",
"row-filter-plural": "ਕਤਾਰ ਫਿਲਟਰ",
"row-limit": "ਕਤਾਰ ਸੀਮਾ",
@ -1748,10 +1751,12 @@
"runs-for": "اجرا می‌شود برای",
"s3-config-source": "منبع پیکربندی S3",
"sample": "نمونه",
"sample-config-type": "Tipo de configuração de amostra",
"sample-data": "داده‌های نمونه",
"sample-data-count": "تعداد داده‌های نمونه",
"sample-data-count-lowercase": "تعداد داده‌های نمونه",
"sample-data-ingestion-configuration": "Sample Data Ingestion Configuration",
"sampling-method-type": "Tipo de método de amostragem",
"saturday": "شنبه",
"save": "ذخیره",
"save-changes": "تبدیلیاں محفوظ کریں",
@ -1939,6 +1944,7 @@
"started-following": "شروع به دنبال کردن شد",
"starting-offset": "آفست شروع",
"state": "Estado",
"static": "Estático",
"status": "وضعیت",
"stay-up-to-date": "در جریان بمانید",
"step": "مرحله",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "رشته‌ها",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Limiar",
"threshold-plural": "Limiares",
"thursday": "پنج‌شنبه",
"tier": "سطح",
"tier-label-type": "نوع برچسب سطح",

View file

@ -650,6 +650,7 @@
"drive-plural": "Unidades",
"duplicate": "Duplicar",
"duration": "Duração",
"dynamic": "Dinâmico",
"dynamic-assertion": "Asserção dinâmica",
"edge": "Borda",
"edge-bundling": "Agrupamento de Arestas",
@ -1568,6 +1569,7 @@
"profile-config": "Configuração de Perfil",
"profile-lowercase": "perfil",
"profile-name": "Nome do Perfil",
"profile-sample": "Amostra de perfil",
"profile-sample-type": "Amostra de Perfil {{type}}",
"profiler": "Criador de perfil",
"profiler-configuration": "Configuração do Profiler",
@ -1724,6 +1726,7 @@
"row": "Linha",
"row-count": "Contagem de linhas",
"row-count-lowercase": "contagem de linhas",
"row-count-threshold": "Limiar de contagem de linhas",
"row-filter": "Filtro de linha",
"row-filter-plural": "Filtros de linha",
"row-limit": "Limite de linhas",
@ -1748,10 +1751,12 @@
"runs-for": "Executa por",
"s3-config-source": "Fonte de Configuração S3",
"sample": "Amostra",
"sample-config-type": "Tipo de configuração de amostra",
"sample-data": "Dados de Amostra",
"sample-data-count": "Contagem de Dados de Amostra",
"sample-data-count-lowercase": "contagem de dados de amostra",
"sample-data-ingestion-configuration": "Configuração de Ingestão de Dados de Exemplo",
"sampling-method-type": "Tipo de método de amostragem",
"saturday": "Sábado",
"save": "Salvar",
"save-changes": "Salvar alterações",
@ -1939,6 +1944,7 @@
"started-following": "Começou a seguir",
"starting-offset": "Deslocamento inicial",
"state": "Estado",
"static": "Estático",
"status": "Status",
"stay-up-to-date": "Mantenha-se Atualizado",
"step": "Etapa",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "tópicos",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Limiar",
"threshold-plural": "Limiares",
"thursday": "Quinta-feira",
"tier": "Camada",
"tier-label-type": "Tipo de Rótulo de Camada",

View file

@ -650,6 +650,7 @@
"drive-plural": "Unidades",
"duplicate": "Duplicar",
"duration": "Duração",
"dynamic": "Dinâmico",
"dynamic-assertion": "Asserção Dinâmica",
"edge": "Borda",
"edge-bundling": "Agrupamento de Arestas",
@ -1568,6 +1569,7 @@
"profile-config": "Configuração de Perfil",
"profile-lowercase": "perfil",
"profile-name": "Nome do Perfil",
"profile-sample": "Amostra de perfil",
"profile-sample-type": "Amostra de Perfil {{type}}",
"profiler": "Profiler",
"profiler-configuration": "Configuração do Profiler",
@ -1724,6 +1726,7 @@
"row": "Linha",
"row-count": "Contagem de linhas",
"row-count-lowercase": "contagem de linhas",
"row-count-threshold": "Limiar de contagem de linhas",
"row-filter": "Filtro de linha",
"row-filter-plural": "Filtros de linha",
"row-limit": "Limite de linhas",
@ -1748,10 +1751,12 @@
"runs-for": "Executa por",
"s3-config-source": "Fonte de Configuração S3",
"sample": "Amostra",
"sample-config-type": "Tipo de configuração de amostra",
"sample-data": "Dados de Amostra",
"sample-data-count": "Contagem de Dados de Amostra",
"sample-data-count-lowercase": "contagem de dados de amostra",
"sample-data-ingestion-configuration": "Configuração de Ingestão de Dados de Exemplo",
"sampling-method-type": "Tipo de método de amostragem",
"saturday": "Sábado",
"save": "Salvar",
"save-changes": "Guardar alterações",
@ -1939,6 +1944,7 @@
"started-following": "Começou a seguir",
"starting-offset": "Offset Inicial",
"state": "Estado",
"static": "Estático",
"status": "Estado",
"stay-up-to-date": "Mantenha-se Atualizado",
"step": "Passo",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "discussões",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Limiar",
"threshold-plural": "Limiares",
"thursday": "Quinta-feira",
"tier": "Camada",
"tier-label-type": "Tipo de Etiqueta de Camada",

View file

@ -650,6 +650,7 @@
"drive-plural": "Диски",
"duplicate": "Дубликат",
"duration": "Длительность",
"dynamic": "Динамический",
"dynamic-assertion": "Динамическое утверждение",
"edge": "связь",
"edge-bundling": "Группировка рёбер",
@ -1568,6 +1569,7 @@
"profile-config": "Конфигурация профиля",
"profile-lowercase": "профиль",
"profile-name": "Имя профиля",
"profile-sample": "Выборка профиля",
"profile-sample-type": "Образец профиля {{type}}",
"profiler": "Профайлер",
"profiler-configuration": "Конфигурация профайлера",
@ -1724,6 +1726,7 @@
"row": "Строка",
"row-count": "Количество строк",
"row-count-lowercase": "количество строк",
"row-count-threshold": "Порог количества строк",
"row-filter": "Фильтр строк",
"row-filter-plural": "Фильтры строк",
"row-limit": "Лимит строк",
@ -1748,10 +1751,12 @@
"runs-for": "Запущено для",
"s3-config-source": "Источник конфигурации S3",
"sample": "Пример",
"sample-config-type": "Тип конфигурации выборки",
"sample-data": "Пример данных",
"sample-data-count": "Количество примеров данных",
"sample-data-count-lowercase": "количество примеров данных",
"sample-data-ingestion-configuration": "Настройка загрузки примеров данных",
"sampling-method-type": "Тип метода выборки",
"saturday": "Суббота",
"save": "Сохранить",
"save-changes": "Сохранить изменения",
@ -1939,6 +1944,7 @@
"started-following": "Начало отслеживания",
"starting-offset": "Начальное смещение",
"state": "Состояние",
"static": "Статический",
"status": "Статус",
"stay-up-to-date": "Будьте в курсе последних событий",
"step": "Шаг",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "ветки",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Порог",
"threshold-plural": "Пороговые значения",
"thursday": "Четверг",
"tier": "Критичность",
"tier-label-type": "Тип метки уровня",

View file

@ -650,6 +650,7 @@
"drive-plural": "ไดรฟ์",
"duplicate": "ซ้ำกัน",
"duration": "ระยะเวลา",
"dynamic": "ไดนามิก",
"dynamic-assertion": "การยืนยันแบบไดนามิก",
"edge": "ขอบ",
"edge-bundling": "การรวมกลุ่มเส้นเชื่อม",
@ -1568,6 +1569,7 @@
"profile-config": "การกำหนดค่าโปรไฟล์",
"profile-lowercase": "โปรไฟล์",
"profile-name": "ชื่อโปรไฟล์",
"profile-sample": "ตัวอย่างโปรไฟล์",
"profile-sample-type": "ตัวอย่างโปรไฟล์ {{type}}",
"profiler": "โปรไฟล์เลอร์",
"profiler-configuration": "การกำหนดค่าโปรไฟล์เลอร์",
@ -1724,6 +1726,7 @@
"row": "แถว",
"row-count": "จำนวนแถว",
"row-count-lowercase": "จำนวนแถว",
"row-count-threshold": "เกณฑ์จำนวนแถว",
"row-filter": "ตัวกรองแถว",
"row-filter-plural": "ตัวกรองแถว",
"row-limit": "จำกัดแถว",
@ -1748,10 +1751,12 @@
"runs-for": "รันสำหรับ",
"s3-config-source": "แหล่งที่มาของการตั้งค่า S3",
"sample": "ตัวอย่าง",
"sample-config-type": "ประเภทการกำหนดค่าตัวอย่าง",
"sample-data": "ข้อมูลตัวอย่าง",
"sample-data-count": "จำนวนข้อมูลตัวอย่าง",
"sample-data-count-lowercase": "จำนวนข้อมูลตัวอย่าง",
"sample-data-ingestion-configuration": "การกำหนดค่าการนำเข้าข้อมูลตัวอย่าง",
"sampling-method-type": "ประเภทวิธีการสุ่มตัวอย่าง",
"saturday": "วันเสาร์",
"save": "บันทึก",
"save-changes": "บันทึกการเปลี่ยนแปลง",
@ -1939,6 +1944,7 @@
"started-following": "เริ่มติดตาม",
"starting-offset": "ออฟเซ็ตเริ่มต้น",
"state": "สถานะ",
"static": "สถิต",
"status": "สถานะ",
"stay-up-to-date": "อัปเดตอยู่เสมอ",
"step": "ขั้นตอน",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "กระทู้หลายรายการ",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "เกณฑ์",
"threshold-plural": "เกณฑ์",
"thursday": "วันพฤหัสบดี",
"tier": "ระดับ",
"tier-label-type": "ประเภทป้ายระดับ",

View file

@ -650,6 +650,7 @@
"drive-plural": "Sürücüler",
"duplicate": "Kopya",
"duration": "Süre",
"dynamic": "Dinamik",
"dynamic-assertion": "Dinamik Beyan",
"edge": "Kenar",
"edge-bundling": "Kenar Gruplama",
@ -1568,6 +1569,7 @@
"profile-config": "Profil yapılandırması",
"profile-lowercase": "profil",
"profile-name": "Profil Adı",
"profile-sample": "Profil Örneği",
"profile-sample-type": "Profil Örneği {{type}}",
"profiler": "Profilleyici",
"profiler-configuration": "Profilleyici Yapılandırması",
@ -1724,6 +1726,7 @@
"row": "Satır",
"row-count": "Satır Sayısı",
"row-count-lowercase": "satır sayısı",
"row-count-threshold": "Satır Sayısı Eşiği",
"row-filter": "Satır filtresi",
"row-filter-plural": "Satır filtreleri",
"row-limit": "Satır Limiti",
@ -1748,10 +1751,12 @@
"runs-for": "Şunun için çalışır",
"s3-config-source": "S3 Yapılandırma Kaynağı",
"sample": "Örnek",
"sample-config-type": "Örnek Yapılandırma Türü",
"sample-data": "Örnek Veri",
"sample-data-count": "Örnek Veri Sayısı",
"sample-data-count-lowercase": "örnek veri sayısı",
"sample-data-ingestion-configuration": "Örnek Veri Alım Yapılandırması",
"sampling-method-type": "Örnekleme Yöntemi Türü",
"saturday": "Cumartesi",
"save": "Kaydet",
"save-changes": "Değişiklikleri kaydet",
@ -1939,6 +1944,7 @@
"started-following": "Takip etmeye başladı",
"starting-offset": "Başlangıç Ofseti",
"state": "Durum",
"static": "Statik",
"status": "Durum",
"stay-up-to-date": "Güncel Kalın",
"step": "Adım",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "konu başlıkları",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "Eşik",
"threshold-plural": "Eşikler",
"thursday": "Perşembe",
"tier": "Katman",
"tier-label-type": "Katman Etiketi Türü",

View file

@ -650,6 +650,7 @@
"drive-plural": "驱动器",
"duplicate": "复制",
"duration": "持续时间",
"dynamic": "动态",
"dynamic-assertion": "动态断言",
"edge": "连线",
"edge-bundling": "边捆绑",
@ -1568,6 +1569,7 @@
"profile-config": "分析器配置",
"profile-lowercase": "分析",
"profile-name": "分析器名称",
"profile-sample": "配置文件样本",
"profile-sample-type": "分析样本{{type}}",
"profiler": "分析器",
"profiler-configuration": "分析器配置",
@ -1724,6 +1726,7 @@
"row": "行",
"row-count": "行数",
"row-count-lowercase": "行计数",
"row-count-threshold": "行数阈值",
"row-filter": "行过滤器",
"row-filter-plural": "行过滤器",
"row-limit": "行数限制",
@ -1748,10 +1751,12 @@
"runs-for": "运行时长",
"s3-config-source": "S3 配置源",
"sample": "样本",
"sample-config-type": "采样配置类型",
"sample-data": "样本数据",
"sample-data-count": "样本数据计数",
"sample-data-count-lowercase": "样本数据计数",
"sample-data-ingestion-configuration": "示例数据采集配置",
"sampling-method-type": "采样方法类型",
"saturday": "星期六",
"save": "保存",
"save-changes": "保存更改",
@ -1939,6 +1944,7 @@
"started-following": "开始关注",
"starting-offset": "起始偏移量",
"state": "状态",
"static": "静态",
"status": "状态",
"stay-up-to-date": "保持最新",
"step": "步骤",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "线程",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "阈值",
"threshold-plural": "阈值",
"thursday": "星期四",
"tier": "分级",
"tier-label-type": "分级标签类型",

View file

@ -650,6 +650,7 @@
"drive-plural": "驅動器",
"duplicate": "重複",
"duration": "持續時間",
"dynamic": "動態",
"dynamic-assertion": "動態斷言",
"edge": "邊緣",
"edge-bundling": "邊捆綁",
@ -1568,6 +1569,7 @@
"profile-config": "設定檔組態",
"profile-lowercase": "設定檔",
"profile-name": "設定檔名稱",
"profile-sample": "設定檔樣本",
"profile-sample-type": "設定檔範例 {{type}}",
"profiler": "分析器",
"profiler-configuration": "分析器組態",
@ -1724,6 +1726,7 @@
"row": "資料列",
"row-count": "資料列計數",
"row-count-lowercase": "資料列計數",
"row-count-threshold": "行數閾值",
"row-filter": "列篩選器",
"row-filter-plural": "列篩選器",
"row-limit": "列數限制",
@ -1748,10 +1751,12 @@
"runs-for": "執行於",
"s3-config-source": "S3 組態來源",
"sample": "範例",
"sample-config-type": "抽樣設定類型",
"sample-data": "範例資料",
"sample-data-count": "範例資料計數",
"sample-data-count-lowercase": "範例資料計數",
"sample-data-ingestion-configuration": "範例資料擷取設定",
"sampling-method-type": "抽樣方法類型",
"saturday": "星期六",
"save": "儲存",
"save-changes": "儲存更改",
@ -1939,6 +1944,7 @@
"started-following": "開始追蹤",
"starting-offset": "起始偏移量",
"state": "狀態",
"static": "靜態",
"status": "狀態",
"stay-up-to-date": "保持最新",
"step": "步驟",
@ -2088,6 +2094,8 @@
"thread-plural-lowercase": "對話",
"three-dash-symbol": "---",
"three-dots-symbol": "•••",
"threshold": "閾值",
"threshold-plural": "閾值",
"thursday": "星期四",
"tier": "層級",
"tier-label-type": "層級標籤類型",