diff --git a/bootstrap/sql/migrations/native/1.13.0/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.13.0/mysql/schemaChanges.sql index e16a031b425..97b95dbcf80 100644 --- a/bootstrap/sql/migrations/native/1.13.0/mysql/schemaChanges.sql +++ b/bootstrap/sql/migrations/native/1.13.0/mysql/schemaChanges.sql @@ -130,6 +130,86 @@ FROM user_entity ue, role_entity re WHERE ue.name = 'mcpapplicationbot' AND re.name = 'ApplicationBotImpersonationRole'; + +UPDATE entity_extension +SET json = JSON_SET( + json, + '$.profileSampleConfig', + JSON_OBJECT( + 'sampleConfigType', 'STATIC', + 'config', JSON_OBJECT( + 'profileSample', JSON_EXTRACT(json, '$.profileSample'), + 'profileSampleType', COALESCE( + JSON_EXTRACT(json, '$.profileSampleType'), + CAST('"PERCENTAGE"' AS JSON) + ), + 'samplingMethodType', JSON_EXTRACT(json, '$.samplingMethodType') + ) + ) +) +WHERE extension IN ( + 'table.tableProfilerConfig', + 'database.databaseProfilerConfig', + 'databaseSchema.databaseSchemaProfilerConfig' +) + AND JSON_EXTRACT(json, '$.profileSample') IS NOT NULL + AND JSON_TYPE(JSON_EXTRACT(json, '$.profileSample')) != 'NULL' + AND NOT JSON_CONTAINS_PATH(json, 'one', '$.profileSampleConfig'); + +-- entity_extension: remove old flat fields +UPDATE entity_extension +SET json = JSON_REMOVE( + JSON_REMOVE( + JSON_REMOVE(json, '$.samplingMethodType'), + '$.profileSampleType' + ), + '$.profileSample' +) +WHERE extension IN ( + 'table.tableProfilerConfig', + 'database.databaseProfilerConfig', + 'databaseSchema.databaseSchemaProfilerConfig' +) + AND (JSON_CONTAINS_PATH(json, 'one', '$.profileSample') + OR JSON_CONTAINS_PATH(json, 'one', '$.profileSampleType') + OR JSON_CONTAINS_PATH(json, 'one', '$.samplingMethodType')); + +-- ingestion_pipeline_entity (profiler pipelines): build profileSampleConfig (skip if already migrated) +UPDATE ingestion_pipeline_entity +SET json = JSON_SET( + json, + '$.sourceConfig.config.profileSampleConfig', + JSON_OBJECT( + 'sampleConfigType', 'STATIC', + 'config', JSON_OBJECT( + 'profileSample', JSON_EXTRACT(json, '$.sourceConfig.config.profileSample'), + 'profileSampleType', COALESCE( + JSON_EXTRACT(json, '$.sourceConfig.config.profileSampleType'), + CAST('"PERCENTAGE"' AS JSON) + ), + 'samplingMethodType', JSON_EXTRACT(json, '$.sourceConfig.config.samplingMethodType') + ) + ) +) +WHERE pipelineType = 'profiler' + AND JSON_EXTRACT(json, '$.sourceConfig.config.profileSample') IS NOT NULL + AND JSON_TYPE(JSON_EXTRACT(json, '$.sourceConfig.config.profileSample')) != 'NULL' + AND NOT JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSampleConfig'); + +-- ingestion_pipeline_entity (profiler pipelines): remove old flat fields +UPDATE ingestion_pipeline_entity +SET json = JSON_REMOVE( + JSON_REMOVE( + JSON_REMOVE(json, '$.sourceConfig.config.samplingMethodType'), + '$.sourceConfig.config.profileSampleType' + ), + '$.sourceConfig.config.profileSample' +) +WHERE pipelineType = 'profiler' + AND (JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSample') + OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSampleType') + OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.samplingMethodType')); + -- RDF distributed indexing state tables CREATE TABLE IF NOT EXISTS rdf_index_job ( id VARCHAR(36) NOT NULL, diff --git a/bootstrap/sql/migrations/native/1.13.0/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.13.0/postgres/schemaChanges.sql index df571d6ef05..cfd3d9f74be 100644 --- a/bootstrap/sql/migrations/native/1.13.0/postgres/schemaChanges.sql +++ b/bootstrap/sql/migrations/native/1.13.0/postgres/schemaChanges.sql @@ -151,6 +151,83 @@ WHERE ue.name = 'mcpapplicationbot' AND re.name = 'ApplicationBotImpersonationRole' ON CONFLICT DO NOTHING; +-- Migrate profiler sampling config: move flat profileSample/profileSampleType/samplingMethodType +-- into the new profileSampleConfig structure. Default to STATIC since DYNAMIC is new. + +-- Profiler configs are stored in entity_extension table, not in entity json columns. +-- Extension keys: table.tableProfilerConfig, database.databaseProfilerConfig, databaseSchema.databaseSchemaProfilerConfig +-- The json column in entity_extension contains the config object directly (flat root-level fields). + +-- entity_extension: build profileSampleConfig from existing flat fields (skip if already migrated) +UPDATE entity_extension +SET json = jsonb_set( + json::jsonb, + '{profileSampleConfig}', + jsonb_build_object( + 'sampleConfigType', 'STATIC', + 'config', jsonb_build_object( + 'profileSample', json::jsonb #> '{profileSample}', + 'profileSampleType', COALESCE( + json::jsonb #> '{profileSampleType}', + '"PERCENTAGE"'::jsonb + ), + 'samplingMethodType', json::jsonb #> '{samplingMethodType}' + ) + ) +)::json +WHERE extension IN ( + 'table.tableProfilerConfig', + 'database.databaseProfilerConfig', + 'databaseSchema.databaseSchemaProfilerConfig' +) + AND json::jsonb #>> '{profileSample}' IS NOT NULL + AND json::jsonb #> '{profileSampleConfig}' IS NULL; + +-- entity_extension: remove old flat fields +UPDATE entity_extension +SET json = (json::jsonb #- '{profileSample}' + #- '{profileSampleType}' + #- '{samplingMethodType}')::json +WHERE extension IN ( + 'table.tableProfilerConfig', + 'database.databaseProfilerConfig', + 'databaseSchema.databaseSchemaProfilerConfig' +) + AND (json::jsonb #>> '{profileSample}' IS NOT NULL + OR json::jsonb #>> '{profileSampleType}' IS NOT NULL + OR json::jsonb #>> '{samplingMethodType}' IS NOT NULL); + +-- ingestion_pipeline_entity (profiler pipelines): build profileSampleConfig (skip if already migrated) +UPDATE ingestion_pipeline_entity +SET json = jsonb_set( + json::jsonb, + '{sourceConfig,config,profileSampleConfig}', + jsonb_build_object( + 'sampleConfigType', 'STATIC', + 'config', jsonb_build_object( + 'profileSample', json::jsonb #> '{sourceConfig,config,profileSample}', + 'profileSampleType', COALESCE( + json::jsonb #> '{sourceConfig,config,profileSampleType}', + '"PERCENTAGE"'::jsonb + ), + 'samplingMethodType', json::jsonb #> '{sourceConfig,config,samplingMethodType}' + ) + ) +)::json +WHERE json #>> '{pipelineType}' = 'profiler' + AND json::jsonb #>> '{sourceConfig,config,profileSample}' IS NOT NULL + AND json::jsonb #> '{sourceConfig,config,profileSampleConfig}' IS NULL; + +-- ingestion_pipeline_entity (profiler pipelines): remove old flat fields +UPDATE ingestion_pipeline_entity +SET json = (json::jsonb #- '{sourceConfig,config,profileSample}' + #- '{sourceConfig,config,profileSampleType}' + #- '{sourceConfig,config,samplingMethodType}')::json +WHERE json #>> '{pipelineType}' = 'profiler' + AND (json::jsonb #>> '{sourceConfig,config,profileSample}' IS NOT NULL + OR json::jsonb #>> '{sourceConfig,config,profileSampleType}' IS NOT NULL + OR json::jsonb #>> '{sourceConfig,config,samplingMethodType}' IS NOT NULL); + -- RDF distributed indexing state tables CREATE TABLE IF NOT EXISTS rdf_index_job ( id VARCHAR(36) NOT NULL, diff --git a/ingestion/src/metadata/data_quality/runner/base_test_suite_source.py b/ingestion/src/metadata/data_quality/runner/base_test_suite_source.py index 869ff2c0baf..7b4c8a5dded 100644 --- a/ingestion/src/metadata/data_quality/runner/base_test_suite_source.py +++ b/ingestion/src/metadata/data_quality/runner/base_test_suite_source.py @@ -32,7 +32,12 @@ from metadata.generated.schema.metadataIngestion.workflow import ( ) from metadata.generated.schema.type.entityReference import EntityReference from metadata.ingestion.ometa.ometa_api import OpenMetadata -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sampler_interface import SamplerInterface from metadata.utils.bigquery_utils import copy_service_config from metadata.utils.profiler_utils import get_context_entities @@ -126,9 +131,16 @@ class BaseTestSuiteRunner: schema_entity=schema_entity, database_entity=database_entity, default_sample_config=SampleConfig( - profileSample=self.source_config.profileSample, - profileSampleType=self.source_config.profileSampleType, - samplingMethodType=self.source_config.samplingMethodType, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=self.source_config.profileSample, + profileSampleType=self.source_config.profileSampleType, + samplingMethodType=self.source_config.samplingMethodType, + ), + ) + if self.source_config.profileSample + else None, ), ) diff --git a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py index 804ae783a2f..1e922826579 100644 --- a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py +++ b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py @@ -38,7 +38,7 @@ from metadata.data_quality.validations.models import ( TableDiffRuntimeParameters, TableParameter, ) -from metadata.generated.schema.entity.data.table import Column, ProfileSampleType +from metadata.generated.schema.entity.data.table import Column from metadata.generated.schema.entity.services.connections.database.sapHanaConnection import ( SapHanaScheme, ) @@ -50,6 +50,7 @@ from metadata.generated.schema.tests.basic import ( TestCaseStatus, TestResultValue, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.profiler.metrics.registry import Metrics from metadata.profiler.orm.converter.base import build_orm_col from metadata.profiler.orm.functions.md5 import MD5 @@ -465,16 +466,19 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin): on Table 1 and the hash will ensure that the same row is selected on Table 2. We want to avoid selecting rows with different ids because the comparison will not be sensible. """ - if ( - # no sample configuration - self.runtime_params.table_profile_config is None - or self.runtime_params.table_profile_config.profileSample is None - # sample is 100% or in other words no sample is required - or ( - self.runtime_params.table_profile_config.profileSampleType - == ProfileSampleType.PERCENTAGE - and self.runtime_params.table_profile_config.profileSample == 100 - ) + config = self.runtime_params.table_profile_config + if config is None: + return None, None + profile_sample_config = config.profileSampleConfig if config else None + sample_config = profile_sample_config.root if profile_sample_config else None + static = sample_config.config if sample_config else None + profile_sample = getattr(static, "profileSample", None) if static else None + profile_sample_type = ( + getattr(static, "profileSampleType", None) if static else None + ) + if profile_sample is None or ( + profile_sample_type == ProfileSampleType.PERCENTAGE + and profile_sample == 100 ): return None, None if DatabaseServiceType.Mssql in [ @@ -520,26 +524,19 @@ class TableDiffValidator(BaseTestValidator, SQAValidatorMixin): def calculate_nounce(self, max_nounce=2**32 - 1) -> int: """Calculate the nounce based on the profile sample configuration. The nounce is the sample fraction projected to a number on a scale of 0 to max_nounce""" - if ( - self.runtime_params.table_profile_config.profileSampleType - == ProfileSampleType.PERCENTAGE - ): - return int( - max_nounce - * self.runtime_params.table_profile_config.profileSample - / 100 - ) - if ( - self.runtime_params.table_profile_config.profileSampleType - == ProfileSampleType.ROWS - ): + config = self.runtime_params.table_profile_config + profile_sample_config = config.profileSampleConfig if config else None + sample_config = profile_sample_config.root if profile_sample_config else None + static = sample_config.config if sample_config else None + profile_sample = getattr(static, "profileSample", 100) + profile_sample_type = getattr(static, "profileSampleType", None) + if profile_sample_type == ProfileSampleType.PERCENTAGE: + return int(max_nounce * profile_sample / 100) + if profile_sample_type == ProfileSampleType.ROWS: row_count = self.get_total_row_count() if row_count is None: raise ValueError("Row count is required for ROWS profile sample type") - return int( - max_nounce - * (self.runtime_params.table_profile_config.profileSample / row_count) - ) + return int(max_nounce * (profile_sample / row_count)) raise ValueError("Invalid profile sample type") def get_row_diff_test_case_result( diff --git a/ingestion/src/metadata/mixins/pandas/pandas_mixin.py b/ingestion/src/metadata/mixins/pandas/pandas_mixin.py index 1a068a00628..817405d673c 100644 --- a/ingestion/src/metadata/mixins/pandas/pandas_mixin.py +++ b/ingestion/src/metadata/mixins/pandas/pandas_mixin.py @@ -147,22 +147,23 @@ class PandasInterfaceMixin: def yield_sampled_dfs(): dfs = raw_dataset - if sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: + static = sample_config.get_static_config() + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: # Sampling based on percentage of rows will be applied to each dataframe chunk # to ensure consistent efficiency across large dataset. Other option would be to # either concatenate all dataframes (may cause OOM) or perform 2 passes (one to count rows, # another to sample) which would be less efficient. try: - percentage = sample_config.profileSample or 100 + percentage = static.profileSample or 100 for df in dfs(): yield df.sample(frac=percentage / 100) except Exception as exc: logger.error( - f"Error sampling dataframes based on percentage {sample_config.profileSample}: {exc}" + f"Error sampling dataframes based on percentage {static.profileSample}: {exc}" ) - elif sample_config.profileSampleType == ProfileSampleType.ROWS: + elif static and static.profileSampleType == ProfileSampleType.ROWS: try: - rows = sample_config.profileSample or 0 + rows = static.profileSample or 0 streamed_rows = 0 for df in dfs(): n = len(df) @@ -174,7 +175,7 @@ class PandasInterfaceMixin: break except Exception as exc: logger.error( - f"Error sampling dataframes based on rows {sample_config.profileSample}: {exc}" + f"Error sampling dataframes based on rows {static.profileSample}: {exc}" ) else: logger.warning( diff --git a/ingestion/src/metadata/profiler/processor/core.py b/ingestion/src/metadata/profiler/processor/core.py index 3c96d663431..21ff97020c8 100644 --- a/ingestion/src/metadata/profiler/processor/core.py +++ b/ingestion/src/metadata/profiler/processor/core.py @@ -548,13 +548,15 @@ class Profiler(Generic[TMetric]): createDateTime=raw_create_date, sizeInByte=self._table_results.get("sizeInBytes"), profileSample=( - self.profiler_interface.sampler.sample_config.profileSample + self.profiler_interface.sampler.sample_config.get_static_config().profileSample if self.profiler_interface.sampler.sample_config + and self.profiler_interface.sampler.sample_config.get_static_config() else None ), profileSampleType=( - self.profiler_interface.sampler.sample_config.profileSampleType + self.profiler_interface.sampler.sample_config.get_static_config().profileSampleType if self.profiler_interface.sampler.sample_config + and self.profiler_interface.sampler.sample_config.get_static_config() else None ), customMetrics=self._table_results.get("customMetrics"), diff --git a/ingestion/src/metadata/profiler/source/database/base/profiler_source.py b/ingestion/src/metadata/profiler/source/database/base/profiler_source.py index ddb2114a77a..1433a8c2570 100644 --- a/ingestion/src/metadata/profiler/source/database/base/profiler_source.py +++ b/ingestion/src/metadata/profiler/source/database/base/profiler_source.py @@ -44,7 +44,7 @@ from metadata.sampler.config import ( get_exclude_columns, get_include_columns, ) -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ProfileSampleConfig, SampleConfig from metadata.sampler.sampler_interface import SamplerInterface from metadata.utils.dependency_injector.dependency_injector import ( DependencyNotFoundError, @@ -141,6 +141,19 @@ class ProfilerSource(ProfilerSourceInterface): return config_copy + def _build_default_sample_config(self) -> SampleConfig: + """Build a SampleConfig from the pipeline's profileSampleConfig.""" + profile_sample_config = None + raw = self.source_config.profileSampleConfig if self.source_config else None + if raw: + profile_sample_config = ProfileSampleConfig.model_validate(raw.model_dump()) + return SampleConfig( + profileSampleConfig=profile_sample_config, + randomizedSample=self.source_config.randomizedSample + if self.source_config + else False, + ) + @inject def create_profiler_interface( self, @@ -177,12 +190,7 @@ class ProfilerSource(ProfilerSourceInterface): schema_entity=schema_entity, database_entity=database_entity, table_config=config, - default_sample_config=SampleConfig( - profileSample=self.source_config.profileSample, - profileSampleType=self.source_config.profileSampleType, - samplingMethodType=self.source_config.samplingMethodType, - randomizedSample=self.source_config.randomizedSample, - ), + default_sample_config=self._build_default_sample_config(), # TODO: Change this when we have the processing engine configuration implemented. Right now it does nothing. processing_engine=self.get_processing_engine(self.source_config), ) diff --git a/ingestion/src/metadata/sampler/config.py b/ingestion/src/metadata/sampler/config.py index 33bf033f1da..2ed1d9aef0a 100644 --- a/ingestion/src/metadata/sampler/config.py +++ b/ingestion/src/metadata/sampler/config.py @@ -31,7 +31,14 @@ from metadata.profiler.config import ( get_database_profiler_config, get_schema_profiler_config, ) -from metadata.sampler.models import DatabaseAndSchemaConfig, SampleConfig, TableConfig +from metadata.sampler.models import ( + DatabaseAndSchemaConfig, + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, + TableConfig, +) def get_sample_storage_config( @@ -96,6 +103,55 @@ def get_storage_config_for_table( return None +def _resolve_profile_sample_config( + entity_config: Optional[Union[TableConfig, DatabaseAndSchemaConfig]], + table_profiler_config, + schema_profiler_config, + database_profiler_config, + default_sample_config: Optional[SampleConfig], +) -> Optional[ProfileSampleConfig]: + """Resolve profileSampleConfig through the config hierarchy. + + Checks profileSampleConfig first, then falls back to flat profileSample + fields on manual config models (TableConfig, DatabaseAndSchemaConfig). + """ + for config in ( + entity_config, + table_profiler_config, + schema_profiler_config, + database_profiler_config, + default_sample_config, + ): + if not config: + continue + try: + psc = config.profileSampleConfig + if psc: + unwrapped = psc.root if hasattr(psc, "root") else psc + if isinstance(unwrapped, ProfileSampleConfig): + return unwrapped + return ProfileSampleConfig.model_validate( + unwrapped.model_dump() + if hasattr(unwrapped, "model_dump") + else unwrapped + ) + except AttributeError: + pass + try: + if config.profileSample: + return ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=config.profileSample, + profileSampleType=config.profileSampleType, + samplingMethodType=config.samplingMethodType, + ), + ) + except AttributeError: + pass + return None + + def get_profile_sample_config( entity: Table, schema_entity: Optional[DatabaseSchema], @@ -109,25 +165,15 @@ def get_profile_sample_config( database_entity=database_entity ) - for config in ( - entity_config, - entity.tableProfilerConfig, - schema_profiler_config, - database_profiler_config, - default_sample_config, - ): - try: - if config and config.profileSample: - return SampleConfig( - profileSample=config.profileSample, - profileSampleType=config.profileSampleType, - samplingMethodType=config.samplingMethodType, - randomizedSample=config.randomizedSample, - ) - except AttributeError: - pass + profile_sample_config = _resolve_profile_sample_config( + entity_config=entity_config, + table_profiler_config=entity.tableProfilerConfig, + schema_profiler_config=schema_profiler_config, + database_profiler_config=database_profiler_config, + default_sample_config=default_sample_config, + ) - return SampleConfig() + return SampleConfig(profileSampleConfig=profile_sample_config) def get_sample_query( diff --git a/ingestion/src/metadata/sampler/models.py b/ingestion/src/metadata/sampler/models.py index 490a3543472..464f29ac05b 100644 --- a/ingestion/src/metadata/sampler/models.py +++ b/ingestion/src/metadata/sampler/models.py @@ -11,30 +11,76 @@ """ Sampling Models """ - +from enum import Enum from typing import Any, List, Optional, Union -from pydantic import Field, model_validator +from pydantic import Field, field_validator, model_validator from typing_extensions import Annotated from metadata.config.common import ConfigModel from metadata.generated.schema.entity.data.table import ( ColumnProfilerConfig, PartitionProfilerConfig, - ProfileSampleType, - SamplingMethodType, Table, TableData, ) from metadata.generated.schema.entity.services.connections.connectionBasicType import ( SampleDataStorageConfig, ) -from metadata.generated.schema.type.basic import FullyQualifiedEntityName +from metadata.generated.schema.type.basic import ( + FullyQualifiedEntityName, + ProfileSampleType, + SamplingMethodType, +) from metadata.ingestion.models.custom_pydantic import BaseModel from metadata.ingestion.models.table_metadata import ColumnTag from metadata.pii.types import ClassifiableEntityType +class ProfileSampleConfigType(str, Enum): + STATIC = "STATIC" + DYNAMIC = "DYNAMIC" + + +class DynamicSamplingThreshold(ConfigModel): + """Single threshold entry for dynamic sampling""" + + rowCountThreshold: int + profileSample: Union[float, int] + profileSampleType: Optional[ProfileSampleType] = ProfileSampleType.PERCENTAGE + samplingMethodType: Optional[SamplingMethodType] = None + + +class DynamicSamplingConfig(ConfigModel): + """Configuration for dynamic sampling with row-count-based thresholds""" + + thresholds: Optional[List[DynamicSamplingThreshold]] = None + + @field_validator("thresholds") + @classmethod + def sort_thresholds_descending( + cls, v: Optional[List[DynamicSamplingThreshold]] + ) -> Optional[List[DynamicSamplingThreshold]]: + if v is not None: + return sorted(v, key=lambda t: t.rowCountThreshold, reverse=True) + return v + + +class StaticSamplingConfig(ConfigModel): + """Configuration for static sampling""" + + profileSample: Optional[Union[float, int]] = None + profileSampleType: Optional[ProfileSampleType] = ProfileSampleType.PERCENTAGE + samplingMethodType: Optional[SamplingMethodType] = None + + +class ProfileSampleConfig(ConfigModel): + """Profile sample configuration supporting static and dynamic sampling""" + + sampleConfigType: ProfileSampleConfigType = ProfileSampleConfigType.STATIC + config: Optional[Union[DynamicSamplingConfig, StaticSamplingConfig]] = None + + class BaseProfileConfig(ConfigModel): """base profile config""" @@ -43,7 +89,8 @@ class BaseProfileConfig(ConfigModel): profileSampleType: Optional[ProfileSampleType] = None samplingMethodType: Optional[SamplingMethodType] = None sampleDataCount: Optional[int] = 100 - randomizedSample: Optional[bool] = False + randomizedSample: Optional[bool] = True + profileSampleConfig: Optional[ProfileSampleConfig] = None class ColumnConfig(ConfigModel): @@ -71,6 +118,7 @@ class TableConfig(BaseProfileConfig): profileSampleType=config.profileSampleType, sampleDataCount=config.sampleDataCount, samplingMethodType=config.samplingMethodType, + profileSampleConfig=config.profileSampleConfig, ) return table_config @@ -125,7 +173,13 @@ class SamplerResponse(ConfigModel): class SampleConfig(ConfigModel): """Profile Sample Config""" - profileSample: Optional[Union[float, int]] = None - profileSampleType: Optional[ProfileSampleType] = ProfileSampleType.PERCENTAGE - samplingMethodType: Optional[SamplingMethodType] = None - randomizedSample: Optional[bool] = False + profileSampleConfig: Optional[ProfileSampleConfig] = None + randomizedSample: Optional[bool] = True + + def get_static_config(self) -> Optional[StaticSamplingConfig]: + """Extract the StaticSamplingConfig from profileSampleConfig, or None.""" + if self.profileSampleConfig and self.profileSampleConfig.config: + cfg = self.profileSampleConfig.config + if isinstance(cfg, StaticSamplingConfig): + return cfg + return None diff --git a/ingestion/src/metadata/sampler/nosql/sampler.py b/ingestion/src/metadata/sampler/nosql/sampler.py index b3cbfd2342d..8ec468287cb 100644 --- a/ingestion/src/metadata/sampler/nosql/sampler.py +++ b/ingestion/src/metadata/sampler/nosql/sampler.py @@ -11,7 +11,8 @@ """NoSQL Sampler""" from typing import Dict, List, Optional, Tuple -from metadata.generated.schema.entity.data.table import ProfileSampleType, TableData +from metadata.generated.schema.entity.data.table import TableData +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.profiler.adaptors.factory import factory from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor from metadata.sampler.sampler_interface import SamplerInterface @@ -87,10 +88,11 @@ class NoSQLSampler(SamplerInterface): def _get_limit(self) -> Optional[int]: num_rows = self.client.item_count(self.raw_dataset) - if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: - limit = num_rows * (self.sample_config.profileSample or 100 / 100) - elif self.sample_config.profileSampleType == ProfileSampleType.ROWS: - limit = self.sample_config.profileSample + static = self.sample_config.get_static_config() + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: + limit = num_rows * (static.profileSample or 100 / 100) + elif static and static.profileSampleType == ProfileSampleType.ROWS: + limit = static.profileSample else: limit = SAMPLE_DATA_DEFAULT_COUNT return limit diff --git a/ingestion/src/metadata/sampler/pandas/burstiq/sampler.py b/ingestion/src/metadata/sampler/pandas/burstiq/sampler.py index 75ba8a83fc2..735e0362717 100644 --- a/ingestion/src/metadata/sampler/pandas/burstiq/sampler.py +++ b/ingestion/src/metadata/sampler/pandas/burstiq/sampler.py @@ -20,11 +20,8 @@ from typing import Callable, Iterator, List, Optional import pandas as pd -from metadata.generated.schema.entity.data.table import ( - DataType, - ProfileSampleType, - TableData, -) +from metadata.generated.schema.entity.data.table import DataType, TableData +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.ingestion.source.database.burstiq.client import BurstIQClient from metadata.sampler.sampler_interface import SamplerInterface from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT @@ -81,8 +78,9 @@ class BurstIQSampler(SamplerInterface): return self._cached_frames chain = self.entity.name.root - sample = self.sample_config.profileSample - sample_type = self.sample_config.profileSampleType + static = self.sample_config.get_static_config() + sample = static.profileSample if static else None + sample_type = static.profileSampleType if static else None if sample and sample_type == ProfileSampleType.ROWS: total_limit: Optional[int] = int(sample) diff --git a/ingestion/src/metadata/sampler/pandas/sampler.py b/ingestion/src/metadata/sampler/pandas/sampler.py index 9c15848dc64..62dbffa6fe6 100644 --- a/ingestion/src/metadata/sampler/pandas/sampler.py +++ b/ingestion/src/metadata/sampler/pandas/sampler.py @@ -16,9 +16,9 @@ from typing import Callable, List, Optional, cast from metadata.generated.schema.entity.data.table import ( PartitionProfilerConfig, - ProfileSampleType, TableData, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.mixins.pandas.pandas_mixin import PandasInterfaceMixin from metadata.sampler.sampler_interface import SamplerInterface from metadata.utils.datalake.datalake_utils import GenericDataFrameColumnParser @@ -107,13 +107,15 @@ class DatalakeSampler(SamplerInterface, PandasInterfaceMixin): if self.partition_details: raw_dataset = self._partitioned_table() - if not self.sample_config.profileSample: - return raw_dataset - + static = self.sample_config.get_static_config() if ( - self.sample_config.profileSample == 100 - and self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE - and self.sample_config.randomizedSample is not True + not static + or not static.profileSample + or ( + static.profileSample == 100 + and static.profileSampleType == ProfileSampleType.PERCENTAGE + and self.sample_config.randomizedSample is not True + ) ): return raw_dataset return self.get_sampled_dataframe(raw_dataset, self.sample_config) diff --git a/ingestion/src/metadata/sampler/sqlalchemy/azuresql/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/azuresql/sampler.py index 83a3c4062e9..d184ab420fa 100644 --- a/ingestion/src/metadata/sampler/sqlalchemy/azuresql/sampler.py +++ b/ingestion/src/metadata/sampler/sqlalchemy/azuresql/sampler.py @@ -37,14 +37,15 @@ class AzureSQLSampler(SQASampler): Args: selectable (Table): _description_ """ + static = self.sample_config.get_static_config() if self.entity.tableType != TableType.View: - if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: return selectable.tablesample( - text(f"{self.sample_config.profileSample or 100} PERCENT") + text(f"{static.profileSample or 100} PERCENT") ) return selectable.tablesample( - text(f"{int(self.sample_config.profileSample or 100)} ROWS") + text(f"{int(static.profileSample or 100 if static else 100)} ROWS") ) return selectable diff --git a/ingestion/src/metadata/sampler/sqlalchemy/bigquery/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/bigquery/sampler.py index 2427285d96b..bb0ff30feb6 100644 --- a/ingestion/src/metadata/sampler/sqlalchemy/bigquery/sampler.py +++ b/ingestion/src/metadata/sampler/sqlalchemy/bigquery/sampler.py @@ -20,11 +20,7 @@ from sqlalchemy import Table as SqaTable from sqlalchemy import text from sqlalchemy.orm import Query -from metadata.generated.schema.entity.data.table import ( - ProfileSampleType, - Table, - TableType, -) +from metadata.generated.schema.entity.data.table import Table, TableType from metadata.generated.schema.entity.services.connections.connectionBasicType import ( DataStorageConfig, ) @@ -33,6 +29,7 @@ from metadata.generated.schema.entity.services.connections.database.datalakeConn ) from metadata.generated.schema.entity.services.databaseService import DatabaseConnection from metadata.generated.schema.security.credentials.gcpValues import SingleProjectId +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.ingestion.connections.session import create_and_bind_thread_safe_session from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.sampler.models import SampleConfig @@ -96,12 +93,14 @@ class BigQuerySampler(SQASampler): Args: selectable (Table): Table object """ + static = self.sample_config.get_static_config() if ( - self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE + static + and static.profileSampleType == ProfileSampleType.PERCENTAGE and self.raw_dataset_type != TableType.View ): return selectable.tablesample( - text(f"{self.sample_config.profileSample or 100} PERCENT") + text(f"{static.profileSample or 100} PERCENT") ) return selectable @@ -136,8 +135,10 @@ class BigQuerySampler(SQASampler): def get_sample_query(self, *, column=None) -> Query: """get query for sample data""" # TABLESAMPLE SYSTEM is not supported for views + static = self.sample_config.get_static_config() if ( - self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE + static + and static.profileSampleType == ProfileSampleType.PERCENTAGE and self.raw_dataset_type != TableType.View ): return self._base_sample_query(column).cte( diff --git a/ingestion/src/metadata/sampler/sqlalchemy/mssql/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/mssql/sampler.py index 349ce36fee8..be5bc70ab32 100644 --- a/ingestion/src/metadata/sampler/sqlalchemy/mssql/sampler.py +++ b/ingestion/src/metadata/sampler/sqlalchemy/mssql/sampler.py @@ -17,7 +17,8 @@ for the profiler from sqlalchemy import Table, text from sqlalchemy.sql.selectable import CTE -from metadata.generated.schema.entity.data.table import ProfileSampleType, TableType +from metadata.generated.schema.entity.data.table import TableType +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -32,14 +33,15 @@ class MssqlSampler(SQASampler): Args: selectable (Table): _description_ """ + static = self.sample_config.get_static_config() if self.entity.tableType != TableType.View: - if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: return selectable.tablesample( - text(f"{self.sample_config.profileSample or 100} PERCENT") + text(f"{static.profileSample or 100} PERCENT") ) return selectable.tablesample( - text(f"{int(self.sample_config.profileSample or 100)} ROWS") + text(f"{int(static.profileSample or 100 if static else 100)} ROWS") ) return selectable diff --git a/ingestion/src/metadata/sampler/sqlalchemy/postgres/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/postgres/sampler.py index cf191e58175..ea225b9b297 100644 --- a/ingestion/src/metadata/sampler/sqlalchemy/postgres/sampler.py +++ b/ingestion/src/metadata/sampler/sqlalchemy/postgres/sampler.py @@ -17,7 +17,7 @@ from sqlalchemy import Table as SqaTable from sqlalchemy import func from sqlalchemy.orm import Query -from metadata.generated.schema.entity.data.table import ProfileSampleType, Table +from metadata.generated.schema.entity.data.table import Table from metadata.generated.schema.entity.services.connections.connectionBasicType import ( DataStorageConfig, ) @@ -25,6 +25,7 @@ from metadata.generated.schema.entity.services.connections.database.datalakeConn DatalakeConnection, ) from metadata.generated.schema.entity.services.databaseService import DatabaseConnection +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.sampler.models import SampleConfig from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -64,26 +65,25 @@ class PostgresSampler(SQASampler): ) self.sampling_fn = func.bernoulli self.sampling_method_type = SamplingMethodType.BERNOULLI - if ( - sample_config - and sample_config.samplingMethodType == SamplingMethodType.SYSTEM - ): - self.sampling_fn = func.system + if sample_config: + static = sample_config.get_static_config() + if static and static.samplingMethodType == SamplingMethodType.SYSTEM: + self.sampling_fn = func.system def set_tablesample(self, selectable: SqaTable): """Set the TABLESAMPLE clause for postgres Args: selectable (Table): _description_ """ - if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: - return selectable.tablesample( - self.sampling_fn(self.sample_config.profileSample or 100) - ) + static = self.sample_config.get_static_config() + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: + return selectable.tablesample(self.sampling_fn(static.profileSample or 100)) return selectable def get_sample_query(self, *, column=None) -> Query: - if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: + static = self.sample_config.get_static_config() + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: return self._base_sample_query(column).cte( f"{self.get_sampler_table_name()}_rnd" ) diff --git a/ingestion/src/metadata/sampler/sqlalchemy/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/sampler.py index 9af4ad7cc3a..d64cc8b3596 100644 --- a/ingestion/src/metadata/sampler/sqlalchemy/sampler.py +++ b/ingestion/src/metadata/sampler/sqlalchemy/sampler.py @@ -23,9 +23,9 @@ from sqlalchemy.sql.sqltypes import Enum from metadata.generated.schema.entity.data.table import ( PartitionProfilerConfig, - ProfileSampleType, TableData, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.ingestion.connections.session import create_and_bind_thread_safe_session from metadata.mixins.sqalchemy.sqa_mixin import SQAInterfaceMixin from metadata.profiler.orm.functions.modulo import ModuloFn @@ -157,19 +157,23 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin): def get_sample_query(self, *, column=None) -> Query: """get query for sample data""" + static = self.sample_config.get_static_config() with self.session_factory() as client: - if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: rnd = self._base_sample_query( column, (ModuloFn(RandomNumFn(), 100)).label(RANDOM_LABEL), ).cte(f"{self.get_sampler_table_name()}_rnd") session_query = client.query(rnd) - query = session_query.where( - rnd.c.random <= self.sample_config.profileSample + session_query = session_query.where( + rnd.c.random <= static.profileSample ) - if self.sample_config.randomizedSample is True: - query = query.order_by(rnd.c.random) - return query.cte(f"{self.get_sampler_table_name()}_sample") + if ( + static.profileSample == 100 + and self.sample_config.randomizedSample is True + ): + session_query = session_query.order_by(rnd.c.random) + return session_query.cte(f"{self.get_sampler_table_name()}_sample") table_query = client.query(self.raw_dataset) if self.partition_details: @@ -185,7 +189,7 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin): if self.sample_config.randomizedSample is True else session_query ) - return query.limit(self.sample_config.profileSample).cte( + return query.limit(static.profileSample if static else None).cte( f"{self.get_sampler_table_name()}_rnd" ) @@ -197,16 +201,15 @@ class SQASampler(SamplerInterface, SQAInterfaceMixin): if self.sample_query: return self._rdn_sample_from_user_query() - if not self.sample_config.profileSample: - if self.partition_details: - return self._partitioned_table() - - return self.raw_dataset - + static = self.sample_config.get_static_config() if ( - self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE - and self.sample_config.profileSample == 100 - and self.sample_config.randomizedSample is not True + not static + or not static.profileSample + or ( + static.profileSampleType == ProfileSampleType.PERCENTAGE + and static.profileSample == 100 + and self.sample_config.randomizedSample is not True + ) ): if self.partition_details: return self._partitioned_table() diff --git a/ingestion/src/metadata/sampler/sqlalchemy/snowflake/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/snowflake/sampler.py index cfd6a44dddc..33b72acc719 100644 --- a/ingestion/src/metadata/sampler/sqlalchemy/snowflake/sampler.py +++ b/ingestion/src/metadata/sampler/sqlalchemy/snowflake/sampler.py @@ -18,10 +18,6 @@ from typing import Dict, Optional, Union from sqlalchemy import Table, func, text from sqlalchemy.sql.selectable import CTE -from metadata.generated.schema.entity.data.table import ( - ProfileSampleType, - SamplingMethodType, -) from metadata.generated.schema.entity.services.connections.connectionBasicType import ( DataStorageConfig, ) @@ -29,6 +25,7 @@ from metadata.generated.schema.entity.services.connections.database.datalakeConn DatalakeConnection, ) from metadata.generated.schema.entity.services.databaseService import DatabaseConnection +from metadata.generated.schema.type.basic import ProfileSampleType, SamplingMethodType from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.sampler.models import SampleConfig from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -66,24 +63,24 @@ class SnowflakeSampler(SQASampler): **kwargs, ) self.sampling_method_type = func.bernoulli - if ( - sample_config - and sample_config.samplingMethodType == SamplingMethodType.SYSTEM - ): - self.sampling_method_type = func.system + if sample_config: + static = sample_config.get_static_config() + if static and static.samplingMethodType == SamplingMethodType.SYSTEM: + self.sampling_method_type = func.system def set_tablesample(self, selectable: Table): """Set the TABLESAMPLE clause for Snowflake Args: selectable (Table): _description_ """ - if self.sample_config.profileSampleType == ProfileSampleType.PERCENTAGE: + static = self.sample_config.get_static_config() + if static and static.profileSampleType == ProfileSampleType.PERCENTAGE: return selectable.tablesample( - self.sampling_method_type(self.sample_config.profileSample or 100) + self.sampling_method_type(static.profileSample or 100) ) return selectable.tablesample( - func.ROW(text(f"{self.sample_config.profileSample or 100} ROWS")) + func.ROW(text(f"{static.profileSample or 100 if static else 100} ROWS")) ) def get_sample_query(self, *, column=None) -> CTE: diff --git a/ingestion/tests/cli_e2e/test_cli_bigquery.py b/ingestion/tests/cli_e2e/test_cli_bigquery.py index 9fe067c5d45..c4cf494de0f 100644 --- a/ingestion/tests/cli_e2e/test_cli_bigquery.py +++ b/ingestion/tests/cli_e2e/test_cli_bigquery.py @@ -23,13 +23,12 @@ from metadata.data_quality.api.models import TestCaseDefinition from metadata.generated.schema.entity.data.table import ( ColumnProfile, DmlOperationType, - ProfileSampleType, SystemProfile, TableProfilerConfig, ) from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus from metadata.generated.schema.tests.testCase import TestCaseParameterValue -from metadata.generated.schema.type.basic import Timestamp +from metadata.generated.schema.type.basic import ProfileSampleType, Timestamp from .common.test_cli_db import CliCommonDB from .common_e2e_sqa_mixins import SQACommonMethods diff --git a/ingestion/tests/integration/data_quality/test_data_diff.py b/ingestion/tests/integration/data_quality/test_data_diff.py index 8529a69142a..60c0cf1d91c 100644 --- a/ingestion/tests/integration/data_quality/test_data_diff.py +++ b/ingestion/tests/integration/data_quality/test_data_diff.py @@ -15,11 +15,7 @@ from sqlalchemy.sql import sqltypes from _openmetadata_testutils.postgres.conftest import postgres_container from _openmetadata_testutils.pydantic.test_utils import assert_equal_pydantic_objects from metadata.data_quality.api.models import TestCaseDefinition -from metadata.generated.schema.entity.data.table import ( - ProfileSampleType, - Table, - TableProfilerConfig, -) +from metadata.generated.schema.entity.data.table import Table, TableProfilerConfig from metadata.generated.schema.entity.services.databaseService import DatabaseService from metadata.generated.schema.metadataIngestion.testSuitePipeline import ( TestSuiteConfigType, @@ -30,6 +26,7 @@ from metadata.generated.schema.tests.basic import ( TestResultValue, ) from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue +from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.workflow.data_quality import TestSuiteWorkflow @@ -93,8 +90,13 @@ class TestParameters(BaseModel): passedRows=IsApprox(59, delta=60) & IsPositiveInt, ), TableProfilerConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=10, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 10, + "profileSampleType": "PERCENTAGE", + }, + ), ), ), ( @@ -118,8 +120,13 @@ class TestParameters(BaseModel): passedRows=IsApprox(10, delta=15) & IsPositiveInt, ), TableProfilerConfig( - profileSampleType=ProfileSampleType.ROWS, - profileSample=10, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 10, + "profileSampleType": "ROWS", + }, + ), ), ), ( @@ -349,8 +356,13 @@ class TestParameters(BaseModel): testCaseStatus=TestCaseStatus.Success, ), TableProfilerConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=10, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 10, + "profileSampleType": "PERCENTAGE", + }, + ), ), ), ( diff --git a/ingestion/tests/integration/datalake/conftest.py b/ingestion/tests/integration/datalake/conftest.py index d80dd19512e..f4fef46b9e5 100644 --- a/ingestion/tests/integration/datalake/conftest.py +++ b/ingestion/tests/integration/datalake/conftest.py @@ -18,10 +18,10 @@ import pytest from metadata.generated.schema.entity.data.table import ( PartitionIntervalTypes, - ProfileSampleType, TableProfilerConfig, ) from metadata.generated.schema.entity.services.databaseService import DatabaseService +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.sampler.models import PartitionProfilerConfig from metadata.workflow.classification import AutoClassificationWorkflow from metadata.workflow.data_quality import TestSuiteWorkflow diff --git a/ingestion/tests/integration/integration_base.py b/ingestion/tests/integration/integration_base.py index 6d68db0c077..633b305bb0e 100644 --- a/ingestion/tests/integration/integration_base.py +++ b/ingestion/tests/integration/integration_base.py @@ -197,7 +197,7 @@ PROFILER_INGESTION_CONFIG_TEMPLATE = dedent( "serviceConnection": {{ "config": {service_config} }}, - "sourceConfig": {{"config": {{"type":"Profiler", "profileSample": 100}}}} + "sourceConfig": {{"config": {{"type":"Profiler", "profileSampleConfig": {{"sampleConfigType": "STATIC", "config": {{"profileSample": 100, "profileSampleType": "PERCENTAGE"}}}}}}}} }}, "processor": {{"type": "orm-profiler", "config": {{}}}}, "sink": {{"type": "metadata-rest", "config": {{}}}}, diff --git a/ingestion/tests/integration/ometa/test_ometa_table_api.py b/ingestion/tests/integration/ometa/test_ometa_table_api.py index 2c9687fb158..00ff576eec6 100644 --- a/ingestion/tests/integration/ometa/test_ometa_table_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_table_api.py @@ -53,6 +53,8 @@ from metadata.generated.schema.type.basic import ( ) from metadata.generated.schema.type.entityReference import EntityReference from metadata.generated.schema.type.entityReferenceList import EntityReferenceList +from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig +from metadata.generated.schema.type.staticSamplingConfig import StaticSamplingConfig from metadata.generated.schema.type.usageRequest import UsageRequest from metadata.ingestion.ometa.client import REST @@ -447,7 +449,9 @@ class TestOMetaTableAPI: table = metadata.get_latest_table_profile(expected_fqn) - assert table.profile == table_profile + assert table.profile.timestamp == table_profile.timestamp + assert table.profile.columnCount == table_profile.columnCount + assert table.profile.rowCount == table_profile.rowCount res_column_profile = next( (col.profile for col in table.columns if col.name.root == "id") @@ -620,13 +624,21 @@ class TestOMetaTableAPI: assert table.tableProfilerConfig is None metadata._create_or_update_table_profiler_config( - table.id, table_profiler_config=TableProfilerConfig(profileSample=50.0) + table.id, + table_profiler_config=TableProfilerConfig( + profileSampleConfig=ProfileSampleConfig( + config=StaticSamplingConfig(profileSample=50.0) + ) + ), ) stored = metadata.get_by_name( entity=Table, fqn=table.fullyQualifiedName, fields=["tableProfilerConfig"] ) - assert stored.tableProfilerConfig.profileSample == 50.0 + assert ( + stored.tableProfilerConfig.profileSampleConfig.root.config.profileSample + == 50.0 + ) def test_list_w_skip_on_failure(self, metadata): """ diff --git a/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py b/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py index 5a5133516fe..48e6fc60072 100644 --- a/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py +++ b/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py @@ -25,11 +25,7 @@ import pytest from sqlalchemy import Column, DateTime, Integer, String, create_engine from sqlalchemy.orm import DeclarativeBase -from metadata.generated.schema.entity.data.table import ( - ColumnProfile, - ProfileSampleType, - Table, -) +from metadata.generated.schema.entity.data.table import ColumnProfile, Table from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( OpenMetadataConnection, ) @@ -37,6 +33,7 @@ from metadata.generated.schema.entity.services.databaseService import DatabaseSe from metadata.generated.schema.security.client.openMetadataJWTClientConfig import ( OpenMetadataJWTClientConfig, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.ingestion.connections.session import create_and_bind_session from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.utils.time_utils import ( @@ -283,7 +280,7 @@ def test_profiler_workflow(ingest, metadata, service_name): assert not table.tableProfilerConfig assert profile.profileSample == 75.0 - assert profile.profileSampleType == ProfileSampleType.PERCENTAGE + assert profile.profileSampleType.root == ProfileSampleType.PERCENTAGE workflow_config["processor"]["config"]["tableConfig"][0][ "profileSampleType" @@ -307,7 +304,7 @@ def test_profiler_workflow(ingest, metadata, service_name): assert not table.tableProfilerConfig assert profile.profileSample == 3.0 assert profile.rowCount == 4.0 - assert profile.profileSampleType == ProfileSampleType.ROWS + assert profile.profileSampleType.root == ProfileSampleType.ROWS def test_workflow_sample_profile(ingest, metadata, service_name): @@ -316,7 +313,13 @@ def test_workflow_sample_profile(ingest, metadata, service_name): workflow_config["source"]["sourceConfig"]["config"].update( { "type": "Profiler", - "profileSample": 50, + "profileSampleConfig": { + "sampleConfigType": "STATIC", + "config": { + "profileSample": 50, + "profileSampleType": "PERCENTAGE", + }, + }, "tableFilterPattern": {"includes": ["newUsers"]}, } ) diff --git a/ingestion/tests/integration/test_suite/test_e2e_workflow.py b/ingestion/tests/integration/test_suite/test_e2e_workflow.py index 774a30bf9f6..152b4fe6a96 100644 --- a/ingestion/tests/integration/test_suite/test_e2e_workflow.py +++ b/ingestion/tests/integration/test_suite/test_e2e_workflow.py @@ -35,7 +35,6 @@ from metadata.generated.schema.entity.data.table import ( DataType, PartitionIntervalTypes, PartitionProfilerConfig, - ProfileSampleType, TableProfilerConfig, ) from metadata.generated.schema.entity.services.connections.database.sqliteConnection import ( @@ -51,6 +50,7 @@ from metadata.generated.schema.entity.services.databaseService import ( DatabaseServiceType, ) from metadata.generated.schema.tests.testCase import TestCase +from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.workflow.data_quality import TestSuiteWorkflow @@ -318,8 +318,13 @@ class TestE2EWorkflow(unittest.TestCase): self.metadata.create_or_update_table_profiler_config( fqn=fqn, table_profiler_config=TableProfilerConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=50.0, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 50.0, + "profileSampleType": "PERCENTAGE", + }, + ), ), ) @@ -382,8 +387,13 @@ class TestE2EWorkflow(unittest.TestCase): self.metadata.create_or_update_table_profiler_config( fqn=fqn, table_profiler_config=TableProfilerConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=100.0, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 100.0, + "profileSampleType": "PERCENTAGE", + }, + ), partitioning=PartitionProfilerConfig( enablePartitioning=True, partitionIntervalType=PartitionIntervalTypes.COLUMN_VALUE, diff --git a/ingestion/tests/unit/metadata/data_quality/test_data_diff.py b/ingestion/tests/unit/metadata/data_quality/test_data_diff.py index fa42732d27b..120df26555b 100644 --- a/ingestion/tests/unit/metadata/data_quality/test_data_diff.py +++ b/ingestion/tests/unit/metadata/data_quality/test_data_diff.py @@ -13,13 +13,14 @@ from metadata.data_quality.validations.table.sqlalchemy.tableDiff import ( from metadata.generated.schema.entity.data.table import ( Column, DataType, - ProfileSampleType, TableProfilerConfig, ) from metadata.generated.schema.entity.services.databaseService import ( DatabaseServiceType, ) from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue +from metadata.generated.schema.type.basic import ProfileSampleType +from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig @pytest.mark.parametrize( @@ -49,8 +50,13 @@ def test_compile_and_clauses(elements, expected): **{ "database_service_type": "BigQuery", "table_profile_config": TableProfilerConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=10, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 10, + "profileSampleType": "PERCENTAGE", + }, + ), ), "table1": TableParameter.model_construct( **{ @@ -82,8 +88,13 @@ def test_compile_and_clauses(elements, expected): **{ "database_service_type": "BigQuery", "table_profile_config": TableProfilerConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=20, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 20, + "profileSampleType": "PERCENTAGE", + }, + ), ), "table1": TableParameter.model_construct( **{ @@ -115,8 +126,13 @@ def test_compile_and_clauses(elements, expected): **{ "database_service_type": "BigQuery", "table_profile_config": TableProfilerConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=10, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 10, + "profileSampleType": "PERCENTAGE", + }, + ), ), "table1": TableParameter.model_construct( **{ @@ -148,8 +164,13 @@ def test_compile_and_clauses(elements, expected): **{ "database_service_type": "BigQuery", "table_profile_config": TableProfilerConfig( - profileSampleType=ProfileSampleType.ROWS, - profileSample=20, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 20, + "profileSampleType": "ROWS", + }, + ), ), "table1": TableParameter.model_construct( **{ @@ -180,8 +201,13 @@ def test_compile_and_clauses(elements, expected): TableDiffRuntimeParameters.model_construct( **{ "table_profile_config": TableProfilerConfig( - profileSampleType=ProfileSampleType.ROWS, - profileSample=20, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 20, + "profileSampleType": "ROWS", + }, + ), ), "table1": TableParameter.model_construct( **{ @@ -253,10 +279,12 @@ def test_sample_where_clauses(config, expected): None, ) validator.runtime_params = config - if ( - config.table_profile_config - and config.table_profile_config.profileSampleType == ProfileSampleType.ROWS - ): + table_profile_config = config.table_profile_config if config else None + profile_sample_config = ( + table_profile_config.profileSampleConfig.root if table_profile_config else None + ) + sample_config = profile_sample_config.config if profile_sample_config else None + if sample_config and sample_config.profileSampleType == ProfileSampleType.ROWS: validator.get_total_row_count = Mock(return_value=10_000) with patch("random.choices", Mock(return_value=["a"])): assert validator.sample_where_clause() == expected diff --git a/ingestion/tests/unit/observability/profiler/pandas/test_sample.py b/ingestion/tests/unit/observability/profiler/pandas/test_sample.py index 54ad018cda6..70f3cd68e14 100644 --- a/ingestion/tests/unit/observability/profiler/pandas/test_sample.py +++ b/ingestion/tests/unit/observability/profiler/pandas/test_sample.py @@ -34,7 +34,12 @@ from metadata.profiler.interface.pandas.profiler_interface import ( from metadata.profiler.metrics.registry import Metrics from metadata.profiler.processor.core import Profiler from metadata.readers.dataframe.models import DatalakeColumnWrapper -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.pandas.sampler import DatalakeSampler @@ -175,7 +180,12 @@ class DatalakeSampleTest(TestCase): service_connection_config=DatalakeConnection(configSource={}), ometa_client=None, entity=cls.table_entity, - sample_config=SampleConfig(profileSample=50.0), + sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), ) cls.datalake_profiler_interface = PandasProfilerInterface( service_connection_config=DatalakeConnection(configSource={}), @@ -211,7 +221,12 @@ class DatalakeSampleTest(TestCase): service_connection_config=DatalakeConnection(configSource={}), ometa_client=None, entity=self.table_entity, - sample_config=SampleConfig(profileSample=50.0), + sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), ) random_sample = sampler.get_dataset() res = sum(len(r) for r in random_sample()) @@ -245,7 +260,12 @@ class DatalakeSampleTest(TestCase): service_connection_config=DatalakeConnection(configSource={}), ometa_client=None, entity=self.table_entity, - sample_config=SampleConfig(profileSample=50.0), + sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), ) datalake_profiler_interface = PandasProfilerInterface( service_connection_config=DatalakeConnection(configSource={}), @@ -326,7 +346,12 @@ class DatalakeSampleTest(TestCase): service_connection_config=DatalakeConnection(configSource={}), ometa_client=None, entity=self.table_entity, - sample_config=SampleConfig(profileSample=50.0), + sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), ) sample_data = sampler.fetch_sample_data() @@ -358,7 +383,12 @@ class DatalakeSampleTest(TestCase): service_connection_config=DatalakeConnection(configSource={}), ometa_client=None, entity=self.table_entity, - default_sample_config=SampleConfig(profileSample=50.0), + default_sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), sample_query="`age` > 30", ) sample_data = sampler.fetch_sample_data() diff --git a/ingestion/tests/unit/observability/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py b/ingestion/tests/unit/observability/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py index 9a29a275e77..b6b267fb5db 100644 --- a/ingestion/tests/unit/observability/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py +++ b/ingestion/tests/unit/observability/profiler/sqlalchemy/azuresql/test_azuresql_sampling.py @@ -21,16 +21,21 @@ from metadata.generated.schema.entity.data.table import ( DataType, PartitionIntervalTypes, PartitionProfilerConfig, - ProfileSampleType, Table, ) from metadata.generated.schema.entity.services.connections.database.azureSQLConnection import ( AzureSQLConnection, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.profiler.interface.sqlalchemy.profiler_interface import ( SQAProfilerInterface, ) -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sqlalchemy.azuresql.sampler import AzureSQLSampler from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -93,7 +98,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -116,7 +127,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.ROWS, profileSample=50 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50, + profileSampleType=ProfileSampleType.ROWS, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -139,8 +156,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=50.0, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), partition_details=PartitionProfilerConfig( enablePartitioning=True, diff --git a/ingestion/tests/unit/observability/profiler/sqlalchemy/bigquery/test_bigquery_sampling.py b/ingestion/tests/unit/observability/profiler/sqlalchemy/bigquery/test_bigquery_sampling.py index 813971aa8e0..6e53c802246 100644 --- a/ingestion/tests/unit/observability/profiler/sqlalchemy/bigquery/test_bigquery_sampling.py +++ b/ingestion/tests/unit/observability/profiler/sqlalchemy/bigquery/test_bigquery_sampling.py @@ -12,7 +12,6 @@ from metadata.generated.schema.entity.data.table import ( DataType, PartitionIntervalTypes, PartitionProfilerConfig, - ProfileSampleType, Table, ) from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import ( @@ -22,12 +21,18 @@ from metadata.generated.schema.security.credentials.gcpCredentials import GCPCre from metadata.generated.schema.security.credentials.gcpValues import ( GcpCredentialsValues, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.generated.schema.type.entityReference import EntityReference from metadata.profiler.interface.sqlalchemy.profiler_interface import ( SQAProfilerInterface, ) from metadata.profiler.orm.functions.table_metric_computer import TableType -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sqlalchemy.bigquery.sampler import BigQuerySampler from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -115,7 +120,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), table_type=TableType.Regular, ) @@ -150,7 +161,13 @@ class SampleTest(TestCase): ometa_client=None, entity=view_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -186,7 +203,13 @@ class SampleTest(TestCase): ometa_client=None, entity=view_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), partition_details=PartitionProfilerConfig( enablePartitioning=True, @@ -216,7 +239,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), partition_details=PartitionProfilerConfig( enablePartitioning=True, diff --git a/ingestion/tests/unit/observability/profiler/sqlalchemy/mssql/test_mssql_sampling.py b/ingestion/tests/unit/observability/profiler/sqlalchemy/mssql/test_mssql_sampling.py index a657c8414ef..47f8dd85c44 100644 --- a/ingestion/tests/unit/observability/profiler/sqlalchemy/mssql/test_mssql_sampling.py +++ b/ingestion/tests/unit/observability/profiler/sqlalchemy/mssql/test_mssql_sampling.py @@ -12,16 +12,21 @@ from metadata.generated.schema.entity.data.table import ( DataType, PartitionIntervalTypes, PartitionProfilerConfig, - ProfileSampleType, Table, ) from metadata.generated.schema.entity.services.connections.database.mssqlConnection import ( MssqlConnection, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.profiler.interface.sqlalchemy.profiler_interface import ( SQAProfilerInterface, ) -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sqlalchemy.mssql.sampler import MssqlSampler from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -84,7 +89,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -107,7 +118,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.ROWS, profileSample=50 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50, + profileSampleType=ProfileSampleType.ROWS, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -130,8 +147,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=50.0, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), partition_details=PartitionProfilerConfig( enablePartitioning=True, diff --git a/ingestion/tests/unit/observability/profiler/sqlalchemy/postgres/test_postgres_sampling.py b/ingestion/tests/unit/observability/profiler/sqlalchemy/postgres/test_postgres_sampling.py index 66b04c7f056..e2a0bcb01d1 100644 --- a/ingestion/tests/unit/observability/profiler/sqlalchemy/postgres/test_postgres_sampling.py +++ b/ingestion/tests/unit/observability/profiler/sqlalchemy/postgres/test_postgres_sampling.py @@ -12,17 +12,21 @@ from metadata.generated.schema.entity.data.table import ( DataType, PartitionIntervalTypes, PartitionProfilerConfig, - ProfileSampleType, - SamplingMethodType, Table, ) from metadata.generated.schema.entity.services.connections.database.postgresConnection import ( PostgresConnection, ) +from metadata.generated.schema.type.basic import ProfileSampleType, SamplingMethodType from metadata.profiler.interface.sqlalchemy.profiler_interface import ( SQAProfilerInterface, ) -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sqlalchemy.postgres.sampler import PostgresSampler from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -83,8 +87,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=50.0, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -109,9 +118,14 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=50.0, - samplingMethodType=sampling_method_type, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + samplingMethodType=sampling_method_type, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -130,7 +144,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), partition_details=PartitionProfilerConfig( enablePartitioning=True, diff --git a/ingestion/tests/unit/observability/profiler/sqlalchemy/snowflake/test_snowflake_sampling.py b/ingestion/tests/unit/observability/profiler/sqlalchemy/snowflake/test_snowflake_sampling.py index 5a270eabe27..e65dfd51f05 100644 --- a/ingestion/tests/unit/observability/profiler/sqlalchemy/snowflake/test_snowflake_sampling.py +++ b/ingestion/tests/unit/observability/profiler/sqlalchemy/snowflake/test_snowflake_sampling.py @@ -12,17 +12,21 @@ from metadata.generated.schema.entity.data.table import ( DataType, PartitionIntervalTypes, PartitionProfilerConfig, - ProfileSampleType, - SamplingMethodType, Table, ) from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import ( SnowflakeConnection, ) +from metadata.generated.schema.type.basic import ProfileSampleType, SamplingMethodType from metadata.profiler.interface.sqlalchemy.profiler_interface import ( SQAProfilerInterface, ) -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sqlalchemy.sampler import SQASampler from metadata.sampler.sqlalchemy.snowflake.sampler import SnowflakeSampler @@ -82,7 +86,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, profileSample=50.0 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -109,9 +119,14 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=50.0, - samplingMethodType=sampling_method_type, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + samplingMethodType=sampling_method_type, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -134,7 +149,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.ROWS, profileSample=50 + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50, + profileSampleType=ProfileSampleType.ROWS, + ), + ) ), ) query: CTE = sampler.get_sample_query() @@ -157,8 +178,13 @@ class SampleTest(TestCase): ometa_client=None, entity=self.table_entity, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=50.0, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=50.0, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ) ), partition_details=PartitionProfilerConfig( enablePartitioning=True, diff --git a/ingestion/tests/unit/observability/profiler/sqlalchemy/test_runner.py b/ingestion/tests/unit/observability/profiler/sqlalchemy/test_runner.py index a75c139e161..06a81809c58 100644 --- a/ingestion/tests/unit/observability/profiler/sqlalchemy/test_runner.py +++ b/ingestion/tests/unit/observability/profiler/sqlalchemy/test_runner.py @@ -24,7 +24,12 @@ from sqlalchemy.orm import DeclarativeBase from metadata.ingestion.connections.session import create_and_bind_session from metadata.profiler.processor.runner import QueryRunner -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sqlalchemy.sampler import SQASampler from metadata.utils.timeout import cls_timeout @@ -92,7 +97,12 @@ class RunnerTest(TestCase): service_connection_config=Mock(), ometa_client=None, entity=None, - sample_config=SampleConfig(profileSample=50.0), + sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), ) cls.dataset = sampler.get_dataset() diff --git a/ingestion/tests/unit/observability/profiler/sqlalchemy/test_sample.py b/ingestion/tests/unit/observability/profiler/sqlalchemy/test_sample.py index 242709eaa32..6741481b531 100644 --- a/ingestion/tests/unit/observability/profiler/sqlalchemy/test_sample.py +++ b/ingestion/tests/unit/observability/profiler/sqlalchemy/test_sample.py @@ -21,23 +21,24 @@ from sqlalchemy import TEXT, Column, Integer, String, func from sqlalchemy.orm import DeclarativeBase from metadata.generated.schema.entity.data.table import Column as EntityColumn -from metadata.generated.schema.entity.data.table import ( - ColumnName, - DataType, - ProfileSampleType, - Table, -) +from metadata.generated.schema.entity.data.table import ColumnName, DataType, Table from metadata.generated.schema.entity.services.connections.database.sqliteConnection import ( SQLiteConnection, SQLiteScheme, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.profiler.interface.sqlalchemy.profiler_interface import ( SQAProfilerInterface, ) from metadata.profiler.metrics.registry import Metrics from metadata.profiler.orm.registry import CustomTypes from metadata.profiler.processor.core import Profiler -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.sqlalchemy.sampler import SQASampler @@ -111,7 +112,12 @@ class SampleTest(TestCase): service_connection_config=cls.sqlite_conn, ometa_client=None, entity=None, - sample_config=SampleConfig(profileSample=50.0), + sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), ) cls.dataset = cls.sampler.get_dataset() cls.sqa_profiler_interface = SQAProfilerInterface( @@ -357,7 +363,12 @@ class SampleTest(TestCase): service_connection_config=self.sqlite_conn, ometa_client=None, entity=None, - sample_config=SampleConfig(profileSample=50.0), + sample_config=SampleConfig( + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig(profileSample=50.0), + ) + ), sample_query=stmt, ) sample_data = sampler.fetch_sample_data() @@ -375,8 +386,13 @@ class SampleTest(TestCase): ometa_client=None, entity=None, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=100, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=100, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ), randomizedSample=True, ), sample_data_count=5, @@ -397,8 +413,13 @@ class SampleTest(TestCase): ometa_client=None, entity=None, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=100, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=100, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ), randomizedSample=False, ), sample_data_count=5, @@ -419,8 +440,13 @@ class SampleTest(TestCase): ometa_client=None, entity=None, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=100, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=100, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ), randomizedSample=None, ), sample_data_count=5, @@ -441,8 +467,13 @@ class SampleTest(TestCase): ometa_client=None, entity=None, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=100, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=100, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ), randomizedSample=True, ), sample_data_count=5, @@ -462,8 +493,13 @@ class SampleTest(TestCase): ometa_client=None, entity=None, sample_config=SampleConfig( - profileSampleType=ProfileSampleType.PERCENTAGE, - profileSample=100, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=100, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ), randomizedSample=False, ), sample_data_count=5, diff --git a/ingestion/tests/unit/observability/profiler/test_profiler_interface.py b/ingestion/tests/unit/observability/profiler/test_profiler_interface.py index c4b94ddc7a8..cb618bfd7ed 100644 --- a/ingestion/tests/unit/observability/profiler/test_profiler_interface.py +++ b/ingestion/tests/unit/observability/profiler/test_profiler_interface.py @@ -21,20 +21,15 @@ from metadata.generated.schema.entity.data.databaseSchema import ( DatabaseSchema, DatabaseSchemaProfilerConfig, ) -from metadata.generated.schema.entity.data.table import ( - ProfileSampleType, - Table, - TableProfilerConfig, -) +from metadata.generated.schema.entity.data.table import Table, TableProfilerConfig from metadata.generated.schema.entity.services.connections.connectionBasicType import ( DataStorageConfig, SampleDataStorageConfig, ) -from metadata.generated.schema.metadataIngestion.databaseServiceProfilerPipeline import ( - DatabaseServiceProfilerPipeline, -) from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.generated.schema.type.entityReference import EntityReference +from metadata.generated.schema.type.samplingConfig import ProfileSampleConfig from metadata.profiler.api.models import DatabaseAndSchemaConfig, TableConfig from metadata.profiler.config import ( get_database_profiler_config, @@ -63,8 +58,13 @@ class ProfilerInterfaceTest(TestCase): columns=[], tableProfilerConfig=TableProfilerConfig( sampleDataCount=101, - profileSample=11, - profileSampleType=ProfileSampleType.PERCENTAGE, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={ + "profileSample": 11, + "profileSampleType": "PERCENTAGE", + }, + ), ), service=EntityReference( id="ba451e8a-5069-4a45-ac38-95421bbdcb5a", @@ -88,7 +88,10 @@ class ProfilerInterfaceTest(TestCase): cls.schema_profiler_config = DatabaseSchemaProfilerConfig( sampleDataCount=102, - profileSample=12, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={"profileSample": 12, "profileSampleType": "PERCENTAGE"}, + ), sampleDataStorageConfig=cls.schema_storage_config, ) @@ -118,7 +121,10 @@ class ProfilerInterfaceTest(TestCase): cls.database_profiler_config = DatabaseProfilerConfig( sampleDataCount=202, - profileSample=22, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType="STATIC", + config={"profileSample": 22, "profileSampleType": "PERCENTAGE"}, + ), sampleDataStorageConfig=cls.database_storage_config, ) @@ -156,66 +162,50 @@ class ProfilerInterfaceTest(TestCase): ) def test_get_profile_sample_configs(self): - source_config = DatabaseServiceProfilerPipeline() - - expected = SampleConfig( - profileSample=11, - profileSampleType=ProfileSampleType.PERCENTAGE, - ) + # Pipeline has no profileSampleConfig set — resolution should fall through + # to table config which has profileSample=11 actual = get_profile_sample_config( entity=self.table, schema_entity=self.schema_entity, database_entity=self.database_entity, entity_config=None, - default_sample_config=SampleConfig( - profileSample=source_config.profileSample, - profileSampleType=source_config.profileSampleType, - samplingMethodType=source_config.samplingMethodType, - ), + default_sample_config=SampleConfig(), ) - self.assertEqual(expected, actual) + static = actual.get_static_config() + self.assertIsNotNone(static) + self.assertEqual(static.profileSample, 11) + self.assertEqual(static.profileSampleType, ProfileSampleType.PERCENTAGE) profiler = TableConfig( profileSample=11, profileSampleType=ProfileSampleType.PERCENTAGE, fullyQualifiedName="demo", ) - expected = SampleConfig( - profileSample=11, - profileSampleType=ProfileSampleType.PERCENTAGE, - ) actual = get_profile_sample_config( entity=self.table, schema_entity=self.schema_entity, database_entity=self.database_entity, entity_config=profiler, - default_sample_config=SampleConfig( - profileSample=source_config.profileSample, - profileSampleType=source_config.profileSampleType, - samplingMethodType=source_config.samplingMethodType, - ), + default_sample_config=SampleConfig(), ) - self.assertEqual(expected, actual) + static = actual.get_static_config() + self.assertIsNotNone(static) + self.assertEqual(static.profileSample, 11) + self.assertEqual(static.profileSampleType, ProfileSampleType.PERCENTAGE) - profiler = None - expected = SampleConfig( - profileSample=22, - profileSampleType=ProfileSampleType.PERCENTAGE, - ) table_copy = deepcopy(self.table) table_copy.tableProfilerConfig = None actual = get_profile_sample_config( entity=table_copy, schema_entity=None, database_entity=self.database_entity, - entity_config=profiler, - default_sample_config=SampleConfig( - profileSample=source_config.profileSample, - profileSampleType=source_config.profileSampleType, - samplingMethodType=source_config.samplingMethodType, - ), + entity_config=None, + default_sample_config=SampleConfig(), ) - self.assertEqual(expected, actual) + static = actual.get_static_config() + self.assertIsNotNone(static) + self.assertEqual(static.profileSample, 22) + self.assertEqual(static.profileSampleType, ProfileSampleType.PERCENTAGE) def test_get_sample_data_count_config(self): entity_config = TableConfig( diff --git a/ingestion/tests/unit/sampler/test_sampler_100_pct.py b/ingestion/tests/unit/sampler/test_sampler_100_pct.py index 65ab677ddc2..7d2c8411463 100644 --- a/ingestion/tests/unit/sampler/test_sampler_100_pct.py +++ b/ingestion/tests/unit/sampler/test_sampler_100_pct.py @@ -17,8 +17,13 @@ randomization; None and False both skip randomization. """ from unittest.mock import MagicMock, patch -from metadata.generated.schema.entity.data.table import ProfileSampleType -from metadata.sampler.models import SampleConfig +from metadata.generated.schema.type.basic import ProfileSampleType +from metadata.sampler.models import ( + ProfileSampleConfig, + ProfileSampleConfigType, + SampleConfig, + StaticSamplingConfig, +) class TestSQASampler100Pct: @@ -34,8 +39,13 @@ class TestSQASampler100Pct: sampler = SQASampler() sampler.sample_config = SampleConfig( - profileSample=100, - profileSampleType=ProfileSampleType.PERCENTAGE, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=100, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ), randomizedSample=randomized_sample, ) sampler.sample_query = None @@ -81,8 +91,13 @@ class TestDatalakeSampler100Pct: sampler = DatalakeSampler() sampler.sample_config = SampleConfig( - profileSample=100, - profileSampleType=ProfileSampleType.PERCENTAGE, + profileSampleConfig=ProfileSampleConfig( + sampleConfigType=ProfileSampleConfigType.STATIC, + config=StaticSamplingConfig( + profileSample=100, + profileSampleType=ProfileSampleType.PERCENTAGE, + ), + ), randomizedSample=randomized_sample, ) sampler.sample_query = None diff --git a/ingestion/tests/unit/topology/database/test_burstiq_sampler.py b/ingestion/tests/unit/topology/database/test_burstiq_sampler.py index c33229eb679..da74657bab6 100644 --- a/ingestion/tests/unit/topology/database/test_burstiq_sampler.py +++ b/ingestion/tests/unit/topology/database/test_burstiq_sampler.py @@ -25,15 +25,19 @@ from metadata.generated.schema.entity.data.table import Column as EntityColumn from metadata.generated.schema.entity.data.table import ( ColumnName, DataType, - ProfileSampleType, Table, TableData, ) from metadata.generated.schema.entity.services.connections.database.burstIQConnection import ( BurstIQConnection, ) +from metadata.generated.schema.type.basic import ProfileSampleType from metadata.generated.schema.type.entityReference import EntityReference -from metadata.sampler.models import SampleConfig +from metadata.sampler.models import ( + ProfileSampleConfig, + SampleConfig, + StaticSamplingConfig, +) from metadata.sampler.pandas.burstiq.sampler import _PAGE_SIZE, BurstIQSampler from metadata.utils.constants import SAMPLE_DATA_MAX_CELL_LENGTH from metadata.utils.sqa_like_column import SQALikeColumn @@ -98,8 +102,12 @@ class TestBurstIQSamplerGetClient: class TestBurstIQSamplerRawDataset: def test_rows_sample_type_limits_to_exact_count(self, sampler, mock_client): sampler.sample_config = SampleConfig( - profileSample=3, - profileSampleType=ProfileSampleType.ROWS, + profileSampleConfig=ProfileSampleConfig( + config=StaticSamplingConfig( + profileSample=3, + profileSampleType=ProfileSampleType.ROWS, + ) + ) ) mock_client.get_records_by_tql.return_value = [ {"score": 1.0, "age": i} for i in range(3) @@ -115,8 +123,12 @@ class TestBurstIQSamplerRawDataset: def test_percentage_sample_type_queries_chain_metrics(self, sampler, mock_client): sampler.sample_config = SampleConfig( - profileSample=50, - profileSampleType=ProfileSampleType.PERCENTAGE, + profileSampleConfig=ProfileSampleConfig( + config=StaticSamplingConfig( + profileSample=50, + profileSampleType=ProfileSampleType.PERCENTAGE, + ) + ) ) mock_client.get_chain_metrics.return_value = {"TestChain": 100} mock_client.get_records_by_tql.return_value = [ diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/DatabaseSchemaResourceIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/DatabaseSchemaResourceIT.java index b4c0470279a..614f0b1dbb2 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/DatabaseSchemaResourceIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/DatabaseSchemaResourceIT.java @@ -24,8 +24,11 @@ import org.openmetadata.schema.entity.data.DatabaseSchema; import org.openmetadata.schema.entity.services.DatabaseService; import org.openmetadata.schema.type.ApiStatus; import org.openmetadata.schema.type.EntityHistory; +import org.openmetadata.schema.type.ProfileSampleConfig; +import org.openmetadata.schema.type.StaticSamplingConfig; import org.openmetadata.schema.type.api.BulkOperationResult; import org.openmetadata.schema.type.csv.CsvImportResult; +import org.openmetadata.schema.utils.JsonUtils; import org.openmetadata.sdk.client.OpenMetadataClient; import org.openmetadata.sdk.fluent.DatabaseSchemas; import org.openmetadata.sdk.fluent.Databases; @@ -435,9 +438,15 @@ public class DatabaseSchemaResourceIT extends BaseEntityIT { // Create profiler config TableProfilerConfig config = new TableProfilerConfig() - .withProfileSample(50.0) - .withProfileSampleType(TableProfilerConfig.ProfileSampleType.PERCENTAGE); + .withProfileSampleConfig( + new ProfileSampleConfig() + .withSampleConfigType(ProfileSampleConfig.SampleConfigType.STATIC) + .withConfig( + new StaticSamplingConfig() + .withProfileSample(50.0) + .withProfileSampleType( + org.openmetadata.schema.type.TableProfile.ProfileSampleType + .PERCENTAGE))); // Update profiler config Table updated = client.tables().updateProfilerConfig(table.getId(), config); assertNotNull(updated.getTableProfilerConfig()); - assertEquals(50.0, updated.getTableProfilerConfig().getProfileSample()); + assertNotNull(updated.getTableProfilerConfig().getProfileSampleConfig()); + StaticSamplingConfig staticConfig = + JsonUtils.convertValue( + updated.getTableProfilerConfig().getProfileSampleConfig().getConfig(), + StaticSamplingConfig.class); + assertEquals(50.0, staticConfig.getProfileSample()); } // =================================================================== diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseRepository.java index f159643f8f3..36c1d5ba1cf 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseRepository.java @@ -13,6 +13,7 @@ package org.openmetadata.service.jdbi3; +import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty; import static org.openmetadata.csv.CsvUtil.addDomains; import static org.openmetadata.csv.CsvUtil.addExtension; import static org.openmetadata.csv.CsvUtil.addField; @@ -54,7 +55,9 @@ import org.openmetadata.schema.type.AssetCertification; import org.openmetadata.schema.type.DatabaseProfilerConfig; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.Include; +import org.openmetadata.schema.type.ProfileSampleConfig; import org.openmetadata.schema.type.Relationship; +import org.openmetadata.schema.type.StaticSamplingConfig; import org.openmetadata.schema.type.TagLabel; import org.openmetadata.schema.type.change.ChangeSource; import org.openmetadata.schema.type.csv.CsvDocumentation; @@ -345,11 +348,20 @@ public class DatabaseRepository extends EntityRepository { UUID databaseId, DatabaseProfilerConfig databaseProfilerConfig) { // Validate the request content Database database = find(databaseId, Include.NON_DELETED); - if (databaseProfilerConfig.getProfileSampleType() != null - && databaseProfilerConfig.getProfileSample() != null) { - EntityUtil.validateProfileSample( - databaseProfilerConfig.getProfileSampleType().toString(), - databaseProfilerConfig.getProfileSample()); + ProfileSampleConfig profileSampleConfig = databaseProfilerConfig.getProfileSampleConfig(); + if (!nullOrEmpty(profileSampleConfig) && !nullOrEmpty(profileSampleConfig.getConfig())) { + ProfileSampleConfig.SampleConfigType sampleConfigType = + profileSampleConfig.getSampleConfigType(); + if (!nullOrEmpty(sampleConfigType) + && sampleConfigType.equals(ProfileSampleConfig.SampleConfigType.STATIC)) { + StaticSamplingConfig staticConfig = + JsonUtils.convertValue(profileSampleConfig.getConfig(), StaticSamplingConfig.class); + if (staticConfig.getProfileSampleType() != null + && staticConfig.getProfileSample() != null) { + EntityUtil.validateProfileSample( + staticConfig.getProfileSampleType().toString(), staticConfig.getProfileSample()); + } + } } daoCollection diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseSchemaRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseSchemaRepository.java index 205c84e5409..4eb5735879e 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseSchemaRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DatabaseSchemaRepository.java @@ -55,7 +55,9 @@ import org.openmetadata.schema.type.AssetCertification; import org.openmetadata.schema.type.DatabaseSchemaProfilerConfig; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.Include; +import org.openmetadata.schema.type.ProfileSampleConfig; import org.openmetadata.schema.type.Relationship; +import org.openmetadata.schema.type.StaticSamplingConfig; import org.openmetadata.schema.type.TagLabel; import org.openmetadata.schema.type.change.ChangeSource; import org.openmetadata.schema.type.csv.CsvDocumentation; @@ -726,11 +728,20 @@ public class DatabaseSchemaRepository extends EntityRepository { // Validate the request content DatabaseSchema databaseSchema = find(databaseSchemaId, Include.NON_DELETED); - if (databaseSchemaProfilerConfig.getProfileSampleType() != null - && databaseSchemaProfilerConfig.getProfileSample() != null) { - EntityUtil.validateProfileSample( - databaseSchemaProfilerConfig.getProfileSampleType().toString(), - databaseSchemaProfilerConfig.getProfileSample()); + ProfileSampleConfig profileSampleConfig = databaseSchemaProfilerConfig.getProfileSampleConfig(); + if (!nullOrEmpty(profileSampleConfig) && !nullOrEmpty(profileSampleConfig.getConfig())) { + ProfileSampleConfig.SampleConfigType sampleConfigType = + profileSampleConfig.getSampleConfigType(); + if (!nullOrEmpty(sampleConfigType) + && sampleConfigType.equals(ProfileSampleConfig.SampleConfigType.STATIC)) { + StaticSamplingConfig staticConfig = + JsonUtils.convertValue(profileSampleConfig.getConfig(), StaticSamplingConfig.class); + if (staticConfig.getProfileSampleType() != null + && staticConfig.getProfileSample() != null) { + EntityUtil.validateProfileSample( + staticConfig.getProfileSampleType().toString(), staticConfig.getProfileSample()); + } + } } daoCollection diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/IngestionPipelineRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/IngestionPipelineRepository.java index 9e7f2aa3acd..6d8603b2904 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/IngestionPipelineRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/IngestionPipelineRepository.java @@ -925,13 +925,21 @@ public class IngestionPipelineRepository extends EntityRepository { validateColumn(table, columnProfilerConfig.getColumnName()); } } - if (tableProfilerConfig.getProfileSampleType() != null - && tableProfilerConfig.getProfileSample() != null) { - EntityUtil.validateProfileSample( - tableProfilerConfig.getProfileSampleType().toString(), - tableProfilerConfig.getProfileSample()); + ProfileSampleConfig profileSampleConfig = tableProfilerConfig.getProfileSampleConfig(); + if (!nullOrEmpty(profileSampleConfig) && !nullOrEmpty(profileSampleConfig.getConfig())) { + ProfileSampleConfig.SampleConfigType sampleConfigType = + profileSampleConfig.getSampleConfigType(); + if (!nullOrEmpty(sampleConfigType) + && sampleConfigType.equals(ProfileSampleConfig.SampleConfigType.STATIC)) { + StaticSamplingConfig staticConfig = + JsonUtils.convertValue(profileSampleConfig.getConfig(), StaticSamplingConfig.class); + if (staticConfig.getProfileSampleType() != null + && staticConfig.getProfileSample() != null) { + EntityUtil.validateProfileSample( + staticConfig.getProfileSampleType().toString(), staticConfig.getProfileSample()); + } + } } } diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/data/database.json b/openmetadata-spec/src/main/resources/json/schema/entity/data/database.json index 755e313c3b1..960d7d249b0 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/data/database.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/data/database.json @@ -151,23 +151,12 @@ "javaType": "org.openmetadata.schema.type.DatabaseProfilerConfig", "description": "This schema defines the type for Database profile config.", "properties": { - "profileSample": { - "description": "Percentage of data or no. of rows we want to execute the profiler and tests on", - "type": "number", - "default": null - }, - "profileSampleType": { - "$ref": "./table.json#/definitions/profileSampleType" - }, "sampleDataCount": { "description": "Number of row of sample data to be generated", "type": "integer", "default": 50, "title": "Sample Data Rows Count" }, - "samplingMethodType": { - "$ref": "./table.json#/definitions/samplingMethodType" - }, "sampleDataStorageConfig": { "title": "Storage Config for Sample Data", "$ref": "../services/connections/connectionBasicType.json#/definitions/sampleDataStorageConfig" @@ -175,7 +164,10 @@ "randomizedSample": { "description": "Whether to randomize the sample data or not.", "type": "boolean", - "default": false + "default": true + }, + "profileSampleConfig": { + "$ref": "../../type/samplingConfig.json#/definitions/profileSampleConfig" } } }, diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/data/databaseSchema.json b/openmetadata-spec/src/main/resources/json/schema/entity/data/databaseSchema.json index a9fd70671ce..f9bb9e6f1ba 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/data/databaseSchema.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/data/databaseSchema.json @@ -147,23 +147,12 @@ "javaType": "org.openmetadata.schema.type.DatabaseSchemaProfilerConfig", "description": "This schema defines the type for Schema profile config.", "properties": { - "profileSample": { - "description": "Percentage of data or no. of rows we want to execute the profiler and tests on", - "type": "number", - "default": null - }, - "profileSampleType": { - "$ref": "./table.json#/definitions/profileSampleType" - }, "sampleDataCount": { "description": "Number of row of sample data to be generated", "type": "integer", "default": 50, "title": "Sample Data Rows Count" }, - "samplingMethodType": { - "$ref": "./table.json#/definitions/samplingMethodType" - }, "sampleDataStorageConfig": { "title": "Storage Config for Sample Data", "$ref": "../services/connections/connectionBasicType.json#/definitions/sampleDataStorageConfig" @@ -171,7 +160,10 @@ "randomizedSample": { "description": "Whether to randomize the sample data or not.", "type": "boolean", - "default": false + "default": true + }, + "profileSampleConfig": { + "$ref": "../../type/samplingConfig.json#/definitions/profileSampleConfig" } } }, diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/data/table.json b/openmetadata-spec/src/main/resources/json/schema/entity/data/table.json index e4b32799045..c39ee7b56ba 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/data/table.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/data/table.json @@ -12,21 +12,13 @@ ], "definitions": { "profileSampleType": { - "description": "Type of Profile Sample (percentage or rows)", - "type": "string", - "enum": [ - "PERCENTAGE", - "ROWS" - ], - "default": "PERCENTAGE" + "$ref": "../../type/basic.json#/definitions/profileSampleType" }, "samplingMethodType": { - "description": "Type of Sampling Method (BERNOULLI or SYSTEM)", - "type": "string", - "enum": [ - "BERNOULLI", - "SYSTEM" - ] + "$ref": "../../type/basic.json#/definitions/samplingMethodType" + }, + "profileSampleConfig": { + "$ref": "../../type/samplingConfig.json#/definitions/profileSampleConfig" }, "tableType": { "javaType": "org.openmetadata.schema.type.TableType", @@ -839,17 +831,6 @@ "javaType": "org.openmetadata.schema.type.TableProfilerConfig", "description": "This schema defines the type for Table profile config.", "properties": { - "profileSampleType": { - "$ref": "#/definitions/profileSampleType" - }, - "profileSample": { - "description": "Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests", - "type": "number", - "default": null - }, - "samplingMethodType": { - "$ref": "#/definitions/samplingMethodType" - }, "sampleDataCount": { "description": "Number of sample rows to ingest when 'Generate Sample Data' is enabled", "type": "integer", @@ -902,6 +883,9 @@ "description": "Table Specific configuration for Profiling it with a Spark Engine. It is ignored for other engines.", "$ref": "#/definitions/sparkTableProfilerConfig", "default": null + }, + "profileSampleConfig": { + "$ref": "#/definitions/profileSampleConfig" } } }, @@ -914,17 +898,6 @@ "description": "Timestamp on which profile is taken.", "$ref": "../../type/basic.json#/definitions/timestamp" }, - "profileSample": { - "description": "Percentage of data or no. of rows we want to execute the profiler and tests on", - "type": "number", - "default": null - }, - "profileSampleType": { - "$ref": "#/definitions/profileSampleType" - }, - "samplingMethodType": { - "$ref": "#/definitions/samplingMethodType" - }, "columnCount": { "description": "No.of columns in the table.", "type": "number" @@ -949,6 +922,14 @@ "$ref": "#/definitions/customMetricProfile" }, "default": null + }, + "profileSample": { + "description": "Percentage of data or no. of rows we want to execute the profiler and tests on", + "type": "number", + "default": null + }, + "profileSampleType": { + "$ref": "#/definitions/profileSampleType" } }, "required": [ diff --git a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json index df269e12e1d..f2f18ec39ff 100644 --- a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json +++ b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json @@ -86,19 +86,8 @@ "default": false, "title": "Use System Table Statistics" }, - "profileSampleType": { - "$ref": "../entity/data/table.json#/definitions/profileSampleType", - "title": "Profile Sample Type" - }, - "profileSample": { - "description": "Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests", - "type": "number", - "default": null, - "title": "Profile Sample" - }, - "samplingMethodType": { - "$ref": "../entity/data/table.json#/definitions/samplingMethodType", - "title": "Sampling Method Type" + "profileSampleConfig": { + "$ref": "../type/samplingConfig.json#/definitions/profileSampleConfig" }, "randomizedSample": { "description": "Whether to randomize the sample data or not.", diff --git a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/testSuitePipeline.json b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/testSuitePipeline.json index f43ca42a1d3..2c901a4d89a 100644 --- a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/testSuitePipeline.json +++ b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/testSuitePipeline.json @@ -52,11 +52,11 @@ "title": "Profile Sample" }, "profileSampleType": { - "$ref": "../entity/data/table.json#/definitions/profileSampleType", + "$ref": "../type/basic.json#/definitions/profileSampleType", "title": "Profile Sample Type" }, "samplingMethodType": { - "$ref": "../entity/data/table.json#/definitions/samplingMethodType", + "$ref": "../type/basic.json#/definitions/samplingMethodType", "title": "Sampling Method Type" }, "testCases": { diff --git a/openmetadata-spec/src/main/resources/json/schema/type/basic.json b/openmetadata-spec/src/main/resources/json/schema/type/basic.json index 5eeafb88f46..37b6e0aa8f8 100644 --- a/openmetadata-spec/src/main/resources/json/schema/type/basic.json +++ b/openmetadata-spec/src/main/resources/json/schema/type/basic.json @@ -297,6 +297,17 @@ "enabled" ], "additionalProperties": false + }, + "profileSampleType": { + "description": "Type of Profile Sample (percentage or rows)", + "type": "string", + "enum": ["PERCENTAGE", "ROWS"], + "default": "PERCENTAGE" + }, + "samplingMethodType": { + "description": "Type of Sampling Method (BERNOULLI or SYSTEM)", + "type": "string", + "enum": ["BERNOULLI", "SYSTEM"] } } } diff --git a/openmetadata-spec/src/main/resources/json/schema/type/dynamicSamplingConfig.json b/openmetadata-spec/src/main/resources/json/schema/type/dynamicSamplingConfig.json new file mode 100644 index 00000000000..90c7dba8d3b --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/type/dynamicSamplingConfig.json @@ -0,0 +1,42 @@ +{ + "$id": "https://open-metadata.org/schema/type/dynamicSamplingConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DynamicSamplingConfig", + "javaType": "org.openmetadata.schema.type.DynamicSamplingConfig", + "description": "Configuration for dynamic sampling based on table row count.", + "type": "object", + "properties": { + "thresholds": { + "description": "Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. Tables below the lowest threshold are profiled at 100% (no sampling).", + "type": "array", + "items": { + "type": "object", + "properties": { + "rowCountThreshold": { + "description": "Minimum row count for this tier to apply", + "type": "integer", + "minimum": 1, + "title": "Row Count Threshold" + }, + "profileSample": { + "description": "Sample percentage or row count to use for tables at or above this threshold", + "type": "number", + "default": null, + "title": "Profile Sample" + }, + "profileSampleType": { + "$ref": "./basic.json#/definitions/profileSampleType", + "title": "Profile Sample Type" + }, + "samplingMethodType": { + "$ref": "./basic.json#/definitions/samplingMethodType", + "title": "Sampling Method Type" + } + }, + "required": ["rowCountThreshold", "profileSample"], + "additionalProperties": false + } + } + }, + "additionalProperties": false +} diff --git a/openmetadata-spec/src/main/resources/json/schema/type/samplingConfig.json b/openmetadata-spec/src/main/resources/json/schema/type/samplingConfig.json new file mode 100644 index 00000000000..5bc87d8d5a4 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/type/samplingConfig.json @@ -0,0 +1,34 @@ +{ + "$id": "https://open-metadata.org/schema/type/samplingConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SamplingConfig", + "description": "Sampling configuration types for the profiler.", + "definitions": { + "profileSampleConfig": { + "title": "Profile Sample Config", + "javaType": "org.openmetadata.schema.type.ProfileSampleConfig", + "description": "Profile sample configuration supporting static and dynamic sampling strategies.", + "type": "object", + "properties": { + "sampleConfigType": { + "title": "Sample Config Type", + "description": "Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at runtime based on row count thresholds.", + "type": "string", + "enum": ["STATIC", "DYNAMIC"], + "default": "STATIC" + }, + "config": { + "oneOf": [ + { + "$ref": "./dynamicSamplingConfig.json" + }, + { + "$ref": "./staticSamplingConfig.json" + } + ] + } + }, + "additionalProperties": false + } + } +} diff --git a/openmetadata-spec/src/main/resources/json/schema/type/staticSamplingConfig.json b/openmetadata-spec/src/main/resources/json/schema/type/staticSamplingConfig.json new file mode 100644 index 00000000000..7816104b117 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/type/staticSamplingConfig.json @@ -0,0 +1,25 @@ +{ + "$id": "https://open-metadata.org/schema/type/staticSamplingConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "StaticSamplingConfig", + "javaType": "org.openmetadata.schema.type.StaticSamplingConfig", + "description": "Configuration for static sampling based on table row count.", + "type": "object", + "properties": { + "profileSample": { + "description": "Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests", + "type": "number", + "default": null, + "title": "Profile Sample" + }, + "profileSampleType": { + "$ref": "./basic.json#/definitions/profileSampleType", + "title": "Profile Sample Type" + }, + "samplingMethodType": { + "$ref": "./basic.json#/definitions/samplingMethodType", + "title": "Sampling Method Type" + } + }, + "additionalProperties": false +} diff --git a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/DataQuality/Profiler.spec.ts b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/DataQuality/Profiler.spec.ts index 683a7c323d6..ee496a1b9e5 100644 --- a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/DataQuality/Profiler.spec.ts +++ b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/DataQuality/Profiler.spec.ts @@ -331,8 +331,13 @@ test.describe( JSON.stringify({ excludeColumns: [table.entity?.columns[0].name], profileQuery: 'select * from table', - profileSample: 60, - profileSampleType: 'PERCENTAGE', + profileSampleConfig: { + sampleConfigType: 'STATIC', + config: { + profileSample: 60, + profileSampleType: 'PERCENTAGE', + }, + }, includeColumns: [{ columnName: table.entity?.columns[1].name }], partitioning: { partitionColumnName: table.entity?.columns[2].name, @@ -371,8 +376,6 @@ test.describe( JSON.stringify({ excludeColumns: [table.entity?.columns[0].name], profileQuery: 'select * from table', - profileSample: null, - profileSampleType: 'PERCENTAGE', includeColumns: [{ columnName: table.entity?.columns[1].name }], partitioning: { partitionColumnName: table.entity?.columns[2].name, @@ -395,7 +398,9 @@ test.describe( await expect( page.locator('[data-testid="profile-sample"]') ).toBeVisible(); - await expect(page.locator('[data-testid="slider-input"]')).toBeEmpty(); + await expect( + page.locator('[data-testid="slider-input"]') + ).not.toBeVisible(); await expect( page.getByTestId('profile-sample').locator('div') ).toBeVisible(); diff --git a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/RestoreEntityInheritedFields.spec.ts b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/RestoreEntityInheritedFields.spec.ts index d2632b4224e..6127b4727d0 100644 --- a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/RestoreEntityInheritedFields.spec.ts +++ b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/RestoreEntityInheritedFields.spec.ts @@ -36,8 +36,8 @@ import { } from '../../utils/entity'; import { test } from '../fixtures/pages'; -const domain = new Domain(); -const dataProduct = new DataProduct([domain]); +let domain: Domain; +let dataProduct: DataProduct; const entities = [ ApiEndpointClass, @@ -54,6 +54,9 @@ const entities = [ ] as const; test.beforeAll('setup test', async ({ browser }) => { + domain = new Domain(); + dataProduct = new DataProduct([domain]); + const { afterAction, apiContext } = await performAdminLogin(browser); await domain.create(apiContext); await dataProduct.create(apiContext); diff --git a/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts b/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts index 4f50c059ef8..704f61f51f4 100644 --- a/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts +++ b/openmetadata-ui/src/main/resources/ui/playwright/support/entity/ingestion/MySqlIngestionClass.ts @@ -143,8 +143,11 @@ class MysqlIngestionClass extends ServiceBaseClass { await page.click('[data-menu-id*="profiler"]'); - await page.locator('#root\\/profileSample').waitFor(); - await page.fill('#root\\/profileSample', '10'); + await page.getByTestId('profile-sample-input').waitFor(); + await page + .getByTestId('profile-sample-input') + .locator('input') + .fill('10'); await page.click('[data-testid="submit-btn"]'); // Make sure we create ingestion with None schedule to avoid conflict between Airflow and Argo behavior await this.scheduleIngestion(page); diff --git a/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/ProfilerSettings/ProfilerSettings.tsx b/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/ProfilerSettings/ProfilerSettings.tsx index 4e6ae498907..8637c95419a 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/ProfilerSettings/ProfilerSettings.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/ProfilerSettings/ProfilerSettings.tsx @@ -110,17 +110,27 @@ const ProfilerSettings: FC = ({ } }; + const profileSampleType = + profilerConfig?.profileSampleConfig?.config?.profileSampleType ?? + ProfileSampleType.Percentage; + const uiSchema = useMemo( () => ({ - 'ui:order': ['profileSampleType', '*'], - profileSample: { - 'ui:widget': - profilerConfig?.profileSampleType === ProfileSampleType.Percentage - ? 'range' - : 'updown', + profileSampleConfig: { + 'ui:order': ['sampleConfigType', 'config', '*'], + sampleConfigType: { 'ui:widget': 'hidden' }, + config: { + 'ui:order': ['profileSampleType', 'profileSample', '*'], + profileSample: { + 'ui:widget': + profileSampleType === ProfileSampleType.Percentage + ? 'range' + : 'updown', + }, + }, }, }), - [profilerConfig] + [profileSampleType] ); useEffect(() => { diff --git a/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/TableProfiler/ProfilerSettingsModal/ProfilerSettingsModal.tsx b/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/TableProfiler/ProfilerSettingsModal/ProfilerSettingsModal.tsx index 9e685d8b8d8..d5df9b14da9 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/TableProfiler/ProfilerSettingsModal/ProfilerSettingsModal.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/components/Database/Profiler/TableProfiler/ProfilerSettingsModal/ProfilerSettingsModal.tsx @@ -56,6 +56,7 @@ import { CSMode } from '../../../../../enums/codemirror.enum'; import { PartitionIntervalTypes, ProfileSampleType, + SampleConfigType, TableProfilerConfig, } from '../../../../../generated/entity/data/table'; import { @@ -176,11 +177,13 @@ const ProfilerSettingsModal: React.FC = ({ includeColumns, partitioning, profileQuery, - profileSample, - profileSampleType, excludeColumns, sampleDataCount, + profileSampleConfig, } = tableProfilerConfig; + const staticConfig = profileSampleConfig?.config; + const profileSample = staticConfig?.profileSample; + const profileSampleType = staticConfig?.profileSampleType; handleStateChange({ sqlQuery: profileQuery ?? '', profileSample: profileSample, @@ -293,17 +296,25 @@ const ProfilerSettingsModal: React.FC = ({ sampleDataCount, } = data; + const profileSample = profileSampleType + ? profileSampleType === ProfileSampleType.Percentage + ? profileSamplePercentage + : profileSampleRows + : undefined; + const profileConfig: TableProfilerConfig = { excludeColumns: excludeCol.length > 0 ? excludeCol : undefined, profileQuery: !isEmpty(sqlQuery) ? sqlQuery : undefined, - profileSample: profileSampleType - ? profileSampleType === ProfileSampleType.Percentage - ? profileSamplePercentage - : profileSampleRows - : undefined, - profileSampleType: isUndefined(profileSampleType) - ? undefined - : profileSampleType, + profileSampleConfig: + profileSampleType && profileSample + ? { + sampleConfigType: SampleConfigType.Static, + config: { + profileSample, + profileSampleType, + }, + } + : undefined, includeColumns: !isEqual(includeCol, DEFAULT_INCLUDE_PROFILE) ? getIncludesColumns() : undefined, diff --git a/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/IngestionWorkflowForm.tsx b/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/IngestionWorkflowForm.tsx index 36a24bb0f6e..80cc3ae8380 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/IngestionWorkflowForm.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/IngestionWorkflowForm.tsx @@ -11,7 +11,7 @@ * limitations under the License. */ import Form, { IChangeEvent } from '@rjsf/core'; -import { RegistryFieldsType } from '@rjsf/utils'; +import { RegistryFieldsType, UiSchema } from '@rjsf/utils'; import { customizeValidator } from '@rjsf/validator-ajv8'; import { Button, Space } from 'antd'; import classNames from 'classnames'; @@ -39,6 +39,7 @@ import DescriptionFieldTemplate from '../../../../common/Form/JSONSchema/JSONSch import { FieldErrorTemplate } from '../../../../common/Form/JSONSchema/JSONSchemaTemplate/FieldErrorTemplate/FieldErrorTemplate'; import { ObjectFieldTemplate } from '../../../../common/Form/JSONSchema/JSONSchemaTemplate/ObjectFieldTemplate'; import WorkflowArrayFieldTemplate from '../../../../common/Form/JSONSchema/JSONSchemaTemplate/WorkflowArrayFieldTemplate'; +import ProfileSampleConfigField from './ProfileSampleConfigField'; const IngestionWorkflowForm: FC = ({ pipeLineType, @@ -78,7 +79,7 @@ const IngestionWorkflowForm: FC = ({ serviceData?.connection?.config?.supportsIncrementalMetadataExtraction; const uiSchema = useMemo(() => { - let commonSchema = { ...INGESTION_WORKFLOW_UI_SCHEMA }; + let commonSchema: UiSchema = { ...INGESTION_WORKFLOW_UI_SCHEMA }; if (isElasticSearchPipeline) { commonSchema = { ...commonSchema, @@ -93,6 +94,15 @@ const IngestionWorkflowForm: FC = ({ }; } + if (pipeLineType === PipelineType.Profiler) { + commonSchema = { + ...commonSchema, + profileSampleConfig: { + 'ui:field': 'ProfileSampleConfigField', + }, + }; + } + return commonSchema; }, [pipeLineType, operationType]); @@ -142,6 +152,10 @@ const IngestionWorkflowForm: FC = ({ fields['/schemas/rootProcessingEngine'] = SparkAgentField; } + if (pipeLineType === PipelineType.Profiler) { + fields['ProfileSampleConfigField'] = ProfileSampleConfigField; + } + return fields; }, [pipeLineType]); diff --git a/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/ProfileSampleConfigField.test.tsx b/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/ProfileSampleConfigField.test.tsx new file mode 100644 index 00000000000..6cdecb59878 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/ProfileSampleConfigField.test.tsx @@ -0,0 +1,501 @@ +/* + * Copyright 2026 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { FieldProps, IdSchema, Registry } from '@rjsf/utils'; +import { fireEvent, render, screen } from '@testing-library/react'; +import { + ProfileSampleConfig, + ProfileSampleType, + SampleConfigType, + SamplingMethodType, +} from '../../../../../generated/metadataIngestion/databaseServiceProfilerPipeline'; +import ProfileSampleConfigField from './ProfileSampleConfigField'; + +jest.mock('@untitledui/icons', () => ({ + Plus: () => null, + Trash01: () => null, +})); + +jest.mock('@openmetadata/ui-core-components', () => { + const CardHeader = ({ + title, + extra, + }: { + title?: React.ReactNode; + extra?: React.ReactNode; + }) => ( +
+ {title} + {extra} +
+ ); + const CardContent = ({ children }: { children?: React.ReactNode }) => ( +
{children}
+ ); + const CardMock = Object.assign( + ({ + children, + className, + }: { + children?: React.ReactNode; + className?: string; + }) =>
{children}
, + { Content: CardContent, Header: CardHeader } + ); + + const GridItem = ({ children }: { children?: React.ReactNode }) => ( +
{children}
+ ); + const GridMock = Object.assign( + ({ + children, + className, + }: { + children?: React.ReactNode; + className?: string; + }) =>
{children}
, + { Item: GridItem } + ); + + const SelectItem = ({ children }: { children?: React.ReactNode }) => ( +
{children}
+ ); + const SelectMock = Object.assign( + ({ 'data-testid': testId }: { 'data-testid'?: string }) => ( +
+ ), + { Item: SelectItem } + ); + + return { + Button: ({ + children, + onClick, + 'data-testid': testId, + }: { + children?: React.ReactNode; + onClick?: () => void; + 'data-testid'?: string; + iconLeading?: React.ComponentType; + color?: string; + size?: string; + }) => ( + + ), + Card: CardMock, + Grid: GridMock, + Input: ({ + 'data-testid': testId, + value, + onChange, + type, + }: { + 'data-testid'?: string; + value?: string; + onChange?: (value: string) => void; + type?: string; + className?: string; + }) => ( + onChange?.(e.target.value)} + /> + ), + Select: SelectMock, + Typography: ({ + children, + className, + }: { + children?: React.ReactNode; + size?: string; + weight?: string; + className?: string; + as?: React.ElementType; + }) => {children}, + }; +}); + +const mockOnChange = jest.fn(); + +const baseFieldProps: FieldProps = { + autofocus: false, + disabled: false, + formContext: {}, + formData: undefined, + hideError: undefined, + id: 'root/profileSampleConfig', + name: 'profileSampleConfig', + idSchema: { $id: 'root/profileSampleConfig' } as IdSchema, + idSeparator: '/', + schema: { type: 'object', title: 'Profile Sample Config' }, + uiSchema: {}, + readonly: false, + required: false, + rawErrors: undefined, + onChange: mockOnChange, + onBlur: jest.fn(), + onFocus: jest.fn(), + registry: {} as Registry, +}; + +const staticFormData: ProfileSampleConfig = { + sampleConfigType: SampleConfigType.Static, + config: { + profileSample: 80, + profileSampleType: ProfileSampleType.Percentage, + samplingMethodType: SamplingMethodType.Bernoulli, + }, +}; + +const dynamicFormData: ProfileSampleConfig = { + sampleConfigType: SampleConfigType.Dynamic, + config: { + thresholds: [ + { + rowCountThreshold: 1000000, + profileSample: 10, + profileSampleType: ProfileSampleType.Percentage, + samplingMethodType: SamplingMethodType.Bernoulli, + }, + ], + }, +}; + +describe('ProfileSampleConfigField', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe('Default (STATIC) rendering', () => { + it('renders the sample-config-type selector', () => { + render(); + + expect( + screen.getByTestId('sample-config-type-select') + ).toBeInTheDocument(); + expect(screen.getByText('label.sample-config-type')).toBeInTheDocument(); + }); + + it('shows static config fields when sampleConfigType is STATIC', () => { + render( + + ); + + expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument(); + expect( + screen.getByTestId('profile-sample-type-select') + ).toBeInTheDocument(); + expect( + screen.getByTestId('sampling-method-type-select') + ).toBeInTheDocument(); + }); + + it('shows static config fields by default when no formData is provided', () => { + render(); + + expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument(); + expect( + screen.getByTestId('profile-sample-type-select') + ).toBeInTheDocument(); + expect( + screen.getByTestId('sampling-method-type-select') + ).toBeInTheDocument(); + }); + + it('does not show dynamic threshold section in STATIC mode', () => { + render( + + ); + + expect(screen.queryByTestId('add-threshold-btn')).not.toBeInTheDocument(); + expect( + screen.queryByText('label.threshold-plural') + ).not.toBeInTheDocument(); + }); + + it('displays the profile-sample input', () => { + render( + + ); + + expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument(); + }); + }); + + describe('DYNAMIC mode rendering', () => { + it('shows the thresholds section when sampleConfigType is DYNAMIC', () => { + render( + + ); + + expect(screen.getByText('label.threshold-plural')).toBeInTheDocument(); + expect(screen.getByTestId('add-threshold-btn')).toBeInTheDocument(); + }); + + it('does not show static config fields in DYNAMIC mode', () => { + render( + + ); + + expect( + screen.queryByTestId('profile-sample-input') + ).not.toBeInTheDocument(); + expect( + screen.queryByTestId('profile-sample-type-select') + ).not.toBeInTheDocument(); + expect( + screen.queryByTestId('sampling-method-type-select') + ).not.toBeInTheDocument(); + }); + + it('renders a threshold card for each threshold in formData', () => { + render( + + ); + + expect(screen.getByText('label.threshold 1')).toBeInTheDocument(); + expect(screen.getByTestId('row-count-threshold-0')).toBeInTheDocument(); + expect(screen.getByTestId('profile-sample-0')).toBeInTheDocument(); + expect(screen.getByTestId('profile-sample-type-0')).toBeInTheDocument(); + expect(screen.getByTestId('sampling-method-type-0')).toBeInTheDocument(); + }); + + it('renders multiple threshold cards when multiple thresholds exist', () => { + const multiThresholdData: ProfileSampleConfig = { + sampleConfigType: SampleConfigType.Dynamic, + config: { + thresholds: [ + { rowCountThreshold: 1000000, profileSample: 10 }, + { rowCountThreshold: 500000, profileSample: 20 }, + ], + }, + }; + + render( + + ); + + expect(screen.getByText('label.threshold 1')).toBeInTheDocument(); + expect(screen.getByText('label.threshold 2')).toBeInTheDocument(); + expect(screen.getByTestId('row-count-threshold-0')).toBeInTheDocument(); + expect(screen.getByTestId('row-count-threshold-1')).toBeInTheDocument(); + }); + + it('renders the remove button for each threshold', () => { + render( + + ); + + expect(screen.getByTestId('remove-threshold-0')).toBeInTheDocument(); + }); + + it('shows empty threshold list with only the add button when thresholds array is empty', () => { + const emptyDynamic: ProfileSampleConfig = { + sampleConfigType: SampleConfigType.Dynamic, + config: { thresholds: [] }, + }; + + render( + + ); + + expect( + screen.queryByTestId('row-count-threshold-0') + ).not.toBeInTheDocument(); + expect(screen.getByTestId('add-threshold-btn')).toBeInTheDocument(); + }); + }); + + describe('Add threshold interaction', () => { + it('calls onChange with a new default threshold when add button is clicked', () => { + const emptyDynamic: ProfileSampleConfig = { + sampleConfigType: SampleConfigType.Dynamic, + config: { thresholds: [] }, + }; + + render( + + ); + + fireEvent.click(screen.getByTestId('add-threshold-btn')); + + expect(mockOnChange).toHaveBeenCalledWith({ + sampleConfigType: SampleConfigType.Dynamic, + config: { + thresholds: [{ rowCountThreshold: 1, profileSample: 100 }], + }, + }); + }); + + it('appends a new threshold to existing thresholds when add is clicked', () => { + render( + + ); + + fireEvent.click(screen.getByTestId('add-threshold-btn')); + + expect(mockOnChange).toHaveBeenCalledWith({ + sampleConfigType: SampleConfigType.Dynamic, + config: { + thresholds: [ + { + rowCountThreshold: 1000000, + profileSample: 10, + profileSampleType: ProfileSampleType.Percentage, + samplingMethodType: SamplingMethodType.Bernoulli, + }, + { rowCountThreshold: 1, profileSample: 100 }, + ], + }, + }); + }); + }); + + describe('Remove threshold interaction', () => { + it('calls onChange with the threshold removed when remove button is clicked', () => { + render( + + ); + + fireEvent.click(screen.getByTestId('remove-threshold-0')); + + expect(mockOnChange).toHaveBeenCalledWith({ + sampleConfigType: SampleConfigType.Dynamic, + config: { thresholds: [] }, + }); + }); + + it('removes the correct threshold when one of many is deleted', () => { + const multiThresholdData: ProfileSampleConfig = { + sampleConfigType: SampleConfigType.Dynamic, + config: { + thresholds: [ + { rowCountThreshold: 1000000, profileSample: 10 }, + { rowCountThreshold: 500000, profileSample: 20 }, + ], + }, + }; + + render( + + ); + + fireEvent.click(screen.getByTestId('remove-threshold-0')); + + expect(mockOnChange).toHaveBeenCalledWith({ + sampleConfigType: SampleConfigType.Dynamic, + config: { + thresholds: [{ rowCountThreshold: 500000, profileSample: 20 }], + }, + }); + }); + }); + + describe('Config type rendering', () => { + it('shows static fields when formData has STATIC type', () => { + render( + + ); + + expect(screen.getByTestId('profile-sample-input')).toBeInTheDocument(); + expect(screen.queryByTestId('add-threshold-btn')).not.toBeInTheDocument(); + }); + + it('shows dynamic fields when formData has DYNAMIC type', () => { + render( + + ); + + expect(screen.getByTestId('add-threshold-btn')).toBeInTheDocument(); + expect( + screen.queryByTestId('profile-sample-input') + ).not.toBeInTheDocument(); + }); + }); + + describe('Label rendering', () => { + it('renders all field labels in STATIC mode', () => { + render( + + ); + + expect(screen.getByText('label.profile-sample')).toBeInTheDocument(); + expect(screen.getByText('label.profile-sample-type')).toBeInTheDocument(); + expect( + screen.getByText('label.sampling-method-type') + ).toBeInTheDocument(); + }); + + it('renders all field labels in DYNAMIC threshold card', () => { + render( + + ); + + expect(screen.getByText('label.row-count-threshold')).toBeInTheDocument(); + expect(screen.getByText('label.profile-sample')).toBeInTheDocument(); + expect(screen.getByText('label.profile-sample-type')).toBeInTheDocument(); + expect( + screen.getByText('label.sampling-method-type') + ).toBeInTheDocument(); + }); + }); +}); diff --git a/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/ProfileSampleConfigField.tsx b/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/ProfileSampleConfigField.tsx new file mode 100644 index 00000000000..ad55f117365 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/Settings/Services/Ingestion/IngestionWorkflowForm/ProfileSampleConfigField.tsx @@ -0,0 +1,340 @@ +/* + * Copyright 2026 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { + Button, + Card, + Grid, + Input, + Select, + Typography, +} from '@openmetadata/ui-core-components'; +import { FieldProps } from '@rjsf/utils'; +import { Plus, Trash01 } from '@untitledui/icons'; +import { useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; +import { + ICSamplingConfig, + ProfileSampleConfig, + ProfileSampleType, + SampleConfigType, + SamplingMethodType, + Threshold, +} from '../../../../../generated/metadataIngestion/databaseServiceProfilerPipeline'; + +const SAMPLE_CONFIG_TYPE_OPTIONS = [ + { id: SampleConfigType.Static, label: 'STATIC' }, + { id: SampleConfigType.Dynamic, label: 'DYNAMIC' }, +]; + +const PROFILE_SAMPLE_TYPE_OPTIONS = [ + { id: ProfileSampleType.Percentage, label: 'PERCENTAGE' }, + { id: ProfileSampleType.Rows, label: 'ROWS' }, +]; + +const SAMPLING_METHOD_TYPE_OPTIONS = [ + { id: SamplingMethodType.Bernoulli, label: 'BERNOULLI' }, + { id: SamplingMethodType.System, label: 'SYSTEM' }, +]; + +const DEFAULT_THRESHOLD: Threshold = { + rowCountThreshold: 1, + profileSample: 100, +}; + +const ProfileSampleConfigField = (props: FieldProps) => { + const { formData, onChange } = props; + const { t } = useTranslation(); + + const sampleConfigType = + formData?.sampleConfigType ?? SampleConfigType.Static; + const config: ICSamplingConfig = formData?.config ?? {}; + + const handleConfigTypeChange = useCallback( + (type: string | number | null) => { + const newConfig: ICSamplingConfig = + type === SampleConfigType.Dynamic ? { thresholds: [] } : {}; + onChange({ + sampleConfigType: type as SampleConfigType, + config: newConfig, + }); + }, + [onChange] + ); + + const handleStaticFieldChange = useCallback( + ( + field: keyof ICSamplingConfig, + value: ICSamplingConfig[keyof ICSamplingConfig] + ) => { + onChange({ sampleConfigType, config: { ...config, [field]: value } }); + }, + [sampleConfigType, config, onChange] + ); + + const handleThresholdChange = useCallback( + ( + index: number, + field: keyof Threshold, + value: Threshold[keyof Threshold] + ) => { + const thresholds = [...(config.thresholds ?? [])]; + thresholds[index] = { ...thresholds[index], [field]: value }; + onChange({ sampleConfigType, config: { thresholds } }); + }, + [sampleConfigType, config, onChange] + ); + + const handleAddThreshold = useCallback(() => { + const thresholds = [...(config.thresholds ?? []), { ...DEFAULT_THRESHOLD }]; + onChange({ sampleConfigType, config: { thresholds } }); + }, [sampleConfigType, config, onChange]); + + const handleRemoveThreshold = useCallback( + (index: number) => { + const thresholds = (config.thresholds ?? []).filter( + (_, i) => i !== index + ); + onChange({ sampleConfigType, config: { thresholds } }); + }, + [sampleConfigType, config, onChange] + ); + + return ( +
+
+ {t('label.sample-config-type')} + +
+ + {sampleConfigType === SampleConfigType.Static && ( + + +
+ + {t('label.profile-sample')} + + + handleStaticFieldChange( + 'profileSample', + value !== '' ? Number(value) : undefined + ) + } + /> +
+
+ +
+ + {t('label.profile-sample-type')} + + +
+
+ +
+ + {t('label.sampling-method-type')} + + +
+
+
+ )} + + {sampleConfigType === SampleConfigType.Dynamic && ( +
+ + {t('label.threshold-plural')} + + {(config.thresholds ?? []).map((threshold, index) => ( + + handleRemoveThreshold(index)} + /> + } + title={`${t('label.threshold')} ${index + 1}`} + /> + + + +
+ + {t('label.row-count-threshold')} + + + handleThresholdChange( + index, + 'rowCountThreshold', + Number(value) || 1 + ) + } + /> +
+
+ +
+ + {t('label.profile-sample')} + + + handleThresholdChange( + index, + 'profileSample', + Number(value) || 0 + ) + } + /> +
+
+ +
+ + {t('label.profile-sample-type')} + + +
+
+ +
+ + {t('label.sampling-method-type')} + + +
+
+
+
+
+ ))} + +
+ )} +
+ ); +}; + +export default ProfileSampleConfigField; diff --git a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts index e2a9650fa41..fc57abb3c71 100644 --- a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts +++ b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts @@ -494,7 +494,7 @@ export const ADVANCED_PROPERTIES = [ 'includeViews', 'useStatistics', 'confidence', - 'samplingMethodType', + 'profileSampleConfig', 'randomizedSample', 'sampleDataCount', 'threadCount', diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/bulkCreateTable.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/bulkCreateTable.ts index 948cbd4c879..f134996fa9c 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/bulkCreateTable.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/bulkCreateTable.ts @@ -990,13 +990,8 @@ export interface TableProfilerConfig { /** * Users' raw SQL query to fetch sample data and profile the table */ - profileQuery?: string; - /** - * Percentage of data or no. of rows used to compute the profiler metrics and run data - * quality tests - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + profileQuery?: string; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ @@ -1004,8 +999,7 @@ export interface TableProfilerConfig { /** * Number of sample rows to ingest when 'Generate Sample Data' is enabled */ - sampleDataCount?: number; - samplingMethodType?: SamplingMethodType; + sampleDataCount?: number; /** * Table Specific configuration for Profiling it with a Spark Engine. It is ignored for * other engines. @@ -1077,6 +1071,38 @@ export enum PartitionIntervalUnit { Year = "YEAR", } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -1093,6 +1119,28 @@ export enum SamplingMethodType { System = "SYSTEM", } +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Table Specific configuration for Profiling it with a Spark Engine. It is ignored for * other engines. diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createEntityProfile.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createEntityProfile.ts index 556f2a20386..b161376989a 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createEntityProfile.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createEntityProfile.ts @@ -57,8 +57,7 @@ export interface Profile { /** * No.of rows in the table. This is always executed on the whole table. */ - rowCount?: number; - samplingMethodType?: SamplingMethodType; + rowCount?: number; /** * Table size in GB */ @@ -258,14 +257,6 @@ export enum ProfileSampleType { Rows = "ROWS", } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * type of profile * diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTable.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTable.ts index 7bfae351997..1b5de39b778 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTable.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTable.ts @@ -976,13 +976,8 @@ export interface TableProfilerConfig { /** * Users' raw SQL query to fetch sample data and profile the table */ - profileQuery?: string; - /** - * Percentage of data or no. of rows used to compute the profiler metrics and run data - * quality tests - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + profileQuery?: string; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ @@ -990,8 +985,7 @@ export interface TableProfilerConfig { /** * Number of sample rows to ingest when 'Generate Sample Data' is enabled */ - sampleDataCount?: number; - samplingMethodType?: SamplingMethodType; + sampleDataCount?: number; /** * Table Specific configuration for Profiling it with a Spark Engine. It is ignored for * other engines. @@ -1063,6 +1057,38 @@ export enum PartitionIntervalUnit { Year = "YEAR", } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -1079,6 +1105,28 @@ export enum SamplingMethodType { System = "SYSTEM", } +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Table Specific configuration for Profiling it with a Spark Engine. It is ignored for * other engines. diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTableProfile.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTableProfile.ts index 800b7542188..58cb910a9aa 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTableProfile.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/data/createTableProfile.ts @@ -259,8 +259,7 @@ export interface TableProfile { /** * No.of rows in the table. This is always executed on the whole table. */ - rowCount?: number; - samplingMethodType?: SamplingMethodType; + rowCount?: number; /** * Table size in GB */ @@ -278,11 +277,3 @@ export enum ProfileSampleType { Percentage = "PERCENTAGE", Rows = "ROWS", } - -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts index da70b1dd6e4..a2bac271f0f 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts @@ -572,21 +572,13 @@ export interface Pipeline { /** * List of metrics to compute. If empty, then all metrics will be computed */ - metrics?: MetricType[]; - processingEngine?: ProcessingEngine; - /** - * Percentage of data or no. of rows used to compute the profiler metrics and run data - * quality tests - * - * Percentage of data or no. of rows we want to execute the profiler and tests on - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + metrics?: MetricType[]; + processingEngine?: ProcessingEngine; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ - randomizedSample?: boolean; - samplingMethodType?: SamplingMethodType; + randomizedSample?: boolean; /** * Number of threads to use during metric computations */ @@ -756,6 +748,12 @@ export interface Pipeline { * Fully qualified name of the entity to be tested, if we're working with a basic suite. */ entityFullyQualifiedName?: string; + /** + * Percentage of data or no. of rows we want to execute the profiler and tests on + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; /** * Service connections to be used for the logical test suite. */ @@ -2717,6 +2715,38 @@ export enum ProcessingEngineType { Spark = "Spark", } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -2725,6 +2755,36 @@ export enum ProfileSampleType { Rows = "ROWS", } +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} + +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Configuration for SQL query parser selection for lineage extraction. * @@ -2761,14 +2821,6 @@ export enum QueryParserType { SQLGlot = "SqlGlot", } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * Service connections available for the logical test suite. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/database.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/database.ts index 83509b8c1cc..3706fc757e0 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/database.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/database.ts @@ -503,11 +503,7 @@ export interface EntityReference { * This schema defines the type for Database profile config. */ export interface DatabaseProfilerConfig { - /** - * Percentage of data or no. of rows we want to execute the profiler and tests on - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ @@ -517,10 +513,41 @@ export interface DatabaseProfilerConfig { */ sampleDataCount?: number; sampleDataStorageConfig?: SampleDataStorageConfig; - samplingMethodType?: SamplingMethodType; [property: string]: any; } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -529,6 +556,36 @@ export enum ProfileSampleType { Rows = "ROWS", } +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} + +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Storage config to store sample data */ @@ -613,14 +670,6 @@ export interface AwsCredentials { profileName?: string; } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * Status of the Database. * diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/databaseSchema.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/databaseSchema.ts index 11cec637e75..d517dca7882 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/databaseSchema.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/databaseSchema.ts @@ -499,11 +499,7 @@ export interface EntityReference { * This schema defines the type for Schema profile config. */ export interface DatabaseSchemaProfilerConfig { - /** - * Percentage of data or no. of rows we want to execute the profiler and tests on - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ @@ -513,10 +509,41 @@ export interface DatabaseSchemaProfilerConfig { */ sampleDataCount?: number; sampleDataStorageConfig?: SampleDataStorageConfig; - samplingMethodType?: SamplingMethodType; [property: string]: any; } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -525,6 +552,36 @@ export enum ProfileSampleType { Rows = "ROWS", } +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} + +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Storage config to store sample data */ @@ -609,14 +666,6 @@ export interface AwsCredentials { profileName?: string; } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * Status of the DatabaseSchema. * diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/table.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/table.ts index 774c05645f2..6474688f350 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/table.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/data/table.ts @@ -1309,8 +1309,7 @@ export interface TableProfile { /** * No.of rows in the table. This is always executed on the whole table. */ - rowCount?: number; - samplingMethodType?: SamplingMethodType; + rowCount?: number; /** * Table size in GB */ @@ -1329,14 +1328,6 @@ export enum ProfileSampleType { Rows = "ROWS", } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * Sample data for a table. * @@ -1518,13 +1509,8 @@ export interface TableProfilerConfig { /** * Users' raw SQL query to fetch sample data and profile the table */ - profileQuery?: string; - /** - * Percentage of data or no. of rows used to compute the profiler metrics and run data - * quality tests - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + profileQuery?: string; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ @@ -1532,8 +1518,7 @@ export interface TableProfilerConfig { /** * Number of sample rows to ingest when 'Generate Sample Data' is enabled */ - sampleDataCount?: number; - samplingMethodType?: SamplingMethodType; + sampleDataCount?: number; /** * Table Specific configuration for Profiling it with a Spark Engine. It is ignored for * other engines. @@ -1605,6 +1590,68 @@ export enum PartitionIntervalUnit { Year = "YEAR", } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} + +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Table Specific configuration for Profiling it with a Spark Engine. It is ignored for * other engines. diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts index d3e13adeeac..14aec35ab85 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts @@ -1255,21 +1255,13 @@ export interface Pipeline { /** * List of metrics to compute. If empty, then all metrics will be computed */ - metrics?: MetricType[]; - processingEngine?: ProcessingEngine; - /** - * Percentage of data or no. of rows used to compute the profiler metrics and run data - * quality tests - * - * Percentage of data or no. of rows we want to execute the profiler and tests on - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + metrics?: MetricType[]; + processingEngine?: ProcessingEngine; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ - randomizedSample?: boolean; - samplingMethodType?: SamplingMethodType; + randomizedSample?: boolean; /** * Number of threads to use during metric computations */ @@ -1439,6 +1431,12 @@ export interface Pipeline { * Fully qualified name of the entity to be tested, if we're working with a basic suite. */ entityFullyQualifiedName?: string; + /** + * Percentage of data or no. of rows we want to execute the profiler and tests on + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; /** * Service connections to be used for the logical test suite. */ @@ -3301,6 +3299,38 @@ export enum ProcessingEngineType { Spark = "Spark", } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -3309,6 +3339,36 @@ export enum ProfileSampleType { Rows = "ROWS", } +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} + +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Configuration for SQL query parser selection for lineage extraction. * @@ -3345,14 +3405,6 @@ export enum QueryParserType { SQLGlot = "SqlGlot", } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * Service connections available for the logical test suite. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/databaseServiceProfilerPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/databaseServiceProfilerPipeline.ts index 72c2886cbed..9cf3ae76278 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/databaseServiceProfilerPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/databaseServiceProfilerPipeline.ts @@ -40,19 +40,13 @@ export interface DatabaseServiceProfilerPipeline { /** * List of metrics to compute. If empty, then all metrics will be computed */ - metrics?: MetricType[]; - processingEngine?: ProcessingEngine; - /** - * Percentage of data or no. of rows used to compute the profiler metrics and run data - * quality tests - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + metrics?: MetricType[]; + processingEngine?: ProcessingEngine; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ - randomizedSample?: boolean; - samplingMethodType?: SamplingMethodType; + randomizedSample?: boolean; /** * Regex to only fetch tables or databases that matches the pattern. */ @@ -193,6 +187,38 @@ export enum Type { Spark = "Spark", } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -209,6 +235,28 @@ export enum SamplingMethodType { System = "SYSTEM", } +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Pipeline type * diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts index f6cf8cfaa99..ea1b0b3649c 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts @@ -5439,21 +5439,13 @@ export interface Pipeline { /** * List of metrics to compute. If empty, then all metrics will be computed */ - metrics?: MetricType[]; - processingEngine?: ProcessingEngine; - /** - * Percentage of data or no. of rows used to compute the profiler metrics and run data - * quality tests - * - * Percentage of data or no. of rows we want to execute the profiler and tests on - */ - profileSample?: number; - profileSampleType?: ProfileSampleType; + metrics?: MetricType[]; + processingEngine?: ProcessingEngine; + profileSampleConfig?: ProfileSampleConfig; /** * Whether to randomize the sample data or not. */ - randomizedSample?: boolean; - samplingMethodType?: SamplingMethodType; + randomizedSample?: boolean; /** * Number of threads to use during metric computations */ @@ -5623,6 +5615,12 @@ export interface Pipeline { * Fully qualified name of the entity to be tested, if we're working with a basic suite. */ entityFullyQualifiedName?: string; + /** + * Percentage of data or no. of rows we want to execute the profiler and tests on + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; /** * Service connections to be used for the logical test suite. */ @@ -7312,6 +7310,38 @@ export enum ProcessingEngineType { Spark = "Spark", } +/** + * Profile sample configuration supporting static and dynamic sampling strategies. + */ +export interface ProfileSampleConfig { + config?: ICSamplingConfig; + /** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ + sampleConfigType?: SampleConfigType; +} + +/** + * Configuration for dynamic sampling based on table row count. + * + * Configuration for static sampling based on table row count. + */ +export interface ICSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + /** * Type of Profile Sample (percentage or rows) */ @@ -7320,6 +7350,36 @@ export enum ProfileSampleType { Rows = "ROWS", } +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} + +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of sampling to apply. STATIC: fixed sample size. DYNAMIC: sample size determined at + * runtime based on row count thresholds. + */ +export enum SampleConfigType { + Dynamic = "DYNAMIC", + Static = "STATIC", +} + /** * Configuration for SQL query parser selection for lineage extraction. * @@ -7356,14 +7416,6 @@ export enum QueryParserType { SQLGlot = "SqlGlot", } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * Service connections available for the logical test suite. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/type/dynamicSamplingConfig.ts b/openmetadata-ui/src/main/resources/ui/src/generated/type/dynamicSamplingConfig.ts new file mode 100644 index 00000000000..703fe731c5f --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/generated/type/dynamicSamplingConfig.ts @@ -0,0 +1,51 @@ +/* + * Copyright 2026 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Configuration for dynamic sampling based on table row count. + */ +export interface DynamicSamplingConfig { + /** + * Row count thresholds for sampling. Evaluated in order from highest to lowest threshold. + * Tables below the lowest threshold are profiled at 100% (no sampling). + */ + thresholds?: Threshold[]; +} + +export interface Threshold { + /** + * Sample percentage or row count to use for tables at or above this threshold + */ + profileSample: number; + profileSampleType?: ProfileSampleType; + /** + * Minimum row count for this tier to apply + */ + rowCountThreshold: number; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of Profile Sample (percentage or rows) + */ +export enum ProfileSampleType { + Percentage = "PERCENTAGE", + Rows = "ROWS", +} + +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/type/entityProfile.ts b/openmetadata-ui/src/main/resources/ui/src/generated/type/entityProfile.ts index 205b2183f50..e2e9ad9a483 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/type/entityProfile.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/type/entityProfile.ts @@ -117,8 +117,7 @@ export interface Profile { /** * No.of rows in the table. This is always executed on the whole table. */ - rowCount?: number; - samplingMethodType?: SamplingMethodType; + rowCount?: number; /** * Table size in GB */ @@ -318,14 +317,6 @@ export enum ProfileSampleType { Rows = "ROWS", } -/** - * Type of Sampling Method (BERNOULLI or SYSTEM) - */ -export enum SamplingMethodType { - Bernoulli = "BERNOULLI", - System = "SYSTEM", -} - /** * type of profile * diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/type/staticSamplingConfig.ts b/openmetadata-ui/src/main/resources/ui/src/generated/type/staticSamplingConfig.ts new file mode 100644 index 00000000000..f68d8324d9d --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/generated/type/staticSamplingConfig.ts @@ -0,0 +1,40 @@ +/* + * Copyright 2026 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Configuration for static sampling based on table row count. + */ +export interface StaticSamplingConfig { + /** + * Percentage of data or no. of rows used to compute the profiler metrics and run data + * quality tests + */ + profileSample?: number; + profileSampleType?: ProfileSampleType; + samplingMethodType?: SamplingMethodType; +} + +/** + * Type of Profile Sample (percentage or rows) + */ +export enum ProfileSampleType { + Percentage = "PERCENTAGE", + Rows = "ROWS", +} + +/** + * Type of Sampling Method (BERNOULLI or SYSTEM) + */ +export enum SamplingMethodType { + Bernoulli = "BERNOULLI", + System = "SYSTEM", +} diff --git a/openmetadata-ui/src/main/resources/ui/src/jsons/profilerSettings.json b/openmetadata-ui/src/main/resources/ui/src/jsons/profilerSettings.json index a9544ca5efb..17952d4f1a1 100644 --- a/openmetadata-ui/src/main/resources/ui/src/jsons/profilerSettings.json +++ b/openmetadata-ui/src/main/resources/ui/src/jsons/profilerSettings.json @@ -3,18 +3,35 @@ "javaType": "org.openmetadata.schema.type.DatabaseSchemaProfilerConfig", "description": "This schema defines the type for Schema profile config.", "properties": { - "profileSample": { - "description": "Percentage of data or no. of rows we want to execute the profiler and tests on", - "type": "number", - "default": 100, - "title": "Profile Sample" - }, - "profileSampleType": { - "description": "Type of Profile Sample (percentage or rows)", - "type": "string", - "enum": ["PERCENTAGE", "ROWS"], - "default": "PERCENTAGE", - "title": "Profile Sample Value" + "profileSampleConfig": { + "title": "Profile Sample Configuration", + "type": "object", + "properties": { + "sampleConfigType": { + "type": "string", + "enum": ["STATIC", "DYNAMIC"], + "default": "STATIC" + }, + "config": { + "title": "Sampling Configuration", + "type": "object", + "properties": { + "profileSampleType": { + "description": "Type of Profile Sample (percentage or rows)", + "type": "string", + "enum": ["PERCENTAGE", "ROWS"], + "default": "PERCENTAGE", + "title": "Profile Sample Value" + }, + "profileSample": { + "description": "Percentage of data or no. of rows we want to execute the profiler and tests on", + "type": "number", + "default": 100, + "title": "Profile Sample" + } + } + } + } }, "sampleDataCount": { "description": "Number of row of sample data to be generated", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json index 45e9db94307..1de43916991 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json @@ -650,6 +650,7 @@ "drive-plural": "محركات الأقراص", "duplicate": "تكرار", "duration": "المدة", + "dynamic": "ديناميكي", "dynamic-assertion": "تأكيد ديناميكي", "edge": "حافة", "edge-bundling": "تجميع الحواف", @@ -1568,6 +1569,7 @@ "profile-config": "تكوين ملف التعريف", "profile-lowercase": "ملف تعريف", "profile-name": "اسم ملف التعريف", + "profile-sample": "عينة الملف الشخصي", "profile-sample-type": "عينة ملف التعريف {{type}}", "profiler": "محلل البيانات", "profiler-configuration": "تكوين محلل البيانات", @@ -1724,6 +1726,7 @@ "row": "صف", "row-count": "عدد الصفوف", "row-count-lowercase": "عدد الصفوف", + "row-count-threshold": "حد عدد الصفوف", "row-filter": "تصفية الصف", "row-filter-plural": "تصفية الصفوف", "row-limit": "حد الصف", @@ -1748,10 +1751,12 @@ "runs-for": "تشغيل لـ", "s3-config-source": "مصدر تكوين S3", "sample": "عينة", + "sample-config-type": "نوع تكوين العينة", "sample-data": "بيانات العينة", "sample-data-count": "عدد بيانات العينة", "sample-data-count-lowercase": "عدد بيانات العينة", "sample-data-ingestion-configuration": "إعداد استيعاب بيانات العينة", + "sampling-method-type": "نوع طريقة أخذ العينات", "saturday": "السبت", "save": "حفظ", "save-changes": "حفظ التغييرات", @@ -1939,6 +1944,7 @@ "started-following": "بدأ المتابعة", "starting-offset": "إزاحة البدء", "state": "الحالة", + "static": "ثابت", "status": "الحالة", "stay-up-to-date": "ابقَ على اطلاع", "step": "خطوة", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "سلاسل", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "حد", + "threshold-plural": "حدود", "thursday": "الخميس", "tier": "مستوى", "tier-label-type": "نوع تسمية المستوى", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json index 6e007c89dc7..277045152dd 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json @@ -650,6 +650,7 @@ "drive-plural": "Laufwerke", "duplicate": "Duplikat", "duration": "Dauer", + "dynamic": "Dynamisch", "dynamic-assertion": "Dynamische Assertion", "edge": "Kante", "edge-bundling": "Kantenbündelung", @@ -1568,6 +1569,7 @@ "profile-config": "Profil-Konfiguration", "profile-lowercase": "profil", "profile-name": "Name des Profils", + "profile-sample": "Profilstichprobe", "profile-sample-type": "Profil-Sample-Typ {{type}}", "profiler": "Profiler", "profiler-configuration": "Profiler-Konfiguration", @@ -1724,6 +1726,7 @@ "row": "Zeile", "row-count": "Zeilenzahl", "row-count-lowercase": "Anzahl der Zeilen", + "row-count-threshold": "Zeilenanzahl-Schwellenwert", "row-filter": "Zeilenfilter", "row-filter-plural": "Zeilenfilter", "row-limit": "Zeilenlimit", @@ -1748,10 +1751,12 @@ "runs-for": "Läuft für", "s3-config-source": "S3-Konfigurationsquelle", "sample": "Beispiel", + "sample-config-type": "Stichproben-Konfigurationstyp", "sample-data": "Beispieldaten", "sample-data-count": "Beispieldatenanzahl", "sample-data-count-lowercase": "Anzahl der Beispieldaten", "sample-data-ingestion-configuration": "Konfiguration der Beispieldaten-Aufnahme", + "sampling-method-type": "Stichprobenverfahrenstyp", "saturday": "Samstag", "save": "Speichern", "save-changes": "Änderungen speichern", @@ -1939,6 +1944,7 @@ "started-following": "Hat begonnen zu folgen", "starting-offset": "Start-Offset", "state": "Zustand", + "static": "Statisch", "status": "Status", "stay-up-to-date": "Bleiben Sie auf dem neuesten Stand", "step": "Schritt", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "Threads", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Schwellenwert", + "threshold-plural": "Schwellenwerte", "thursday": "Donnerstag", "tier": "Stufe", "tier-label-type": "Stufen-Etikettentyp", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json index 1d3c8b4903a..6693e37c072 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json @@ -650,6 +650,7 @@ "drive-plural": "Drives", "duplicate": "Duplicate", "duration": "Duration", + "dynamic": "Dynamic", "dynamic-assertion": "Dynamic Assertion", "edge": "Edge", "edge-bundling": "Edge Bundling", @@ -1568,7 +1569,8 @@ "profile-config": "Profile config", "profile-lowercase": "profile", "profile-name": "Profile Name", - "profile-sample-type": "Profile Sample {{type}}", + "profile-sample": "Profile Sample", + "profile-sample-type": "Profile Sample type", "profiler": "Profiler", "profiler-configuration": "Profiler Configuration", "profiler-ingestion": "Profiler Ingestion", @@ -1724,6 +1726,7 @@ "row": "Row", "row-count": "Row Count", "row-count-lowercase": "row count", + "row-count-threshold": "Row Count Threshold", "row-filter": "Row Filter", "row-filter-plural": "Row Filters", "row-limit": "Row Limit", @@ -1748,10 +1751,12 @@ "runs-for": "Runs for", "s3-config-source": "S3 Config Source", "sample": "Sample", + "sample-config-type": "Sample Config Type", "sample-data": "Sample Data", "sample-data-count": "Sample Data Count", "sample-data-count-lowercase": "sample data count", "sample-data-ingestion-configuration": "Sample Data Ingestion Configuration", + "sampling-method-type": "Sampling Method Type", "saturday": "Saturday", "save": "Save", "save-changes": "Save changes", @@ -1939,6 +1944,7 @@ "started-following": "Started following", "starting-offset": "Starting Offset", "state": "State", + "static": "Static", "status": "Status", "stay-up-to-date": "Stay Up-to-date", "step": "Step", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "threads", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Threshold", + "threshold-plural": "Thresholds", "thursday": "Thursday", "tier": "Tier", "tier-label-type": "Tier Label Type", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json index 0e8020d0b08..ca3115e3622 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json @@ -650,6 +650,7 @@ "drive-plural": "Unidades", "duplicate": "Duplicar", "duration": "Duración", + "dynamic": "Dinámico", "dynamic-assertion": "Afirmación dinámica", "edge": "Arista", "edge-bundling": "Agrupación de Aristas", @@ -1568,6 +1569,7 @@ "profile-config": "Configuración del perfil", "profile-lowercase": "perfil", "profile-name": "Nombre del perfil", + "profile-sample": "Muestra de perfil", "profile-sample-type": "Muestra de perfil {{type}}", "profiler": "Perfilador", "profiler-configuration": "Configuración del Perfilador", @@ -1724,6 +1726,7 @@ "row": "Fila", "row-count": "Conteo Fila", "row-count-lowercase": "número de filas", + "row-count-threshold": "Umbral de recuento de filas", "row-filter": "Filtro de fila", "row-filter-plural": "Filtros de fila", "row-limit": "Límite de filas", @@ -1748,10 +1751,12 @@ "runs-for": "Ejecuciones para", "s3-config-source": "Fuente de Configuración S3", "sample": "Muestra", + "sample-config-type": "Tipo de configuración de muestra", "sample-data": "Datos de Muestra", "sample-data-count": "Número de datos de muestra", "sample-data-count-lowercase": "número de datos de muestra", "sample-data-ingestion-configuration": "Configuración de Ingesta de Datos de Ejemplo", + "sampling-method-type": "Tipo de método de muestreo", "saturday": "Sábado", "save": "Guardar", "save-changes": "Guardar cambios", @@ -1939,6 +1944,7 @@ "started-following": "Comenzó a seguir", "starting-offset": "Desplazamiento inicial", "state": "Estado", + "static": "Estático", "status": "Estado", "stay-up-to-date": "Manténgase Actualizado", "step": "Paso", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "hilos", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Umbral", + "threshold-plural": "Umbrales", "thursday": "Jueves", "tier": "Nivel", "tier-label-type": "Tipo de Etiqueta de Nivel", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json index 6061bf2e703..5af5333bd19 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json @@ -650,6 +650,7 @@ "drive-plural": "Lecteurs", "duplicate": "Dupliquer", "duration": "Durée", + "dynamic": "Dynamique", "dynamic-assertion": "Assertion Dynamique", "edge": "Bord", "edge-bundling": "Regroupement des Arêtes", @@ -1568,6 +1569,7 @@ "profile-config": "Configuration de Profil", "profile-lowercase": "profil", "profile-name": "Nom du Profil", + "profile-sample": "Échantillon de profil", "profile-sample-type": "Échantillon du Profil {{type}}", "profiler": "Profilage", "profiler-configuration": "Configuration du profileur", @@ -1724,6 +1726,7 @@ "row": "Ligne", "row-count": "Nombre de Lignes", "row-count-lowercase": "Nombre de Ligne", + "row-count-threshold": "Seuil de nombre de lignes", "row-filter": "Filtre de ligne", "row-filter-plural": "Filtres de ligne", "row-limit": "Limite de lignes", @@ -1748,10 +1751,12 @@ "runs-for": "Exécutions pour", "s3-config-source": "Source de Configuration S3", "sample": "Échantillon", + "sample-config-type": "Type de configuration d'échantillon", "sample-data": "Échantillon de Données", "sample-data-count": "Nombre de données Echantillon", "sample-data-count-lowercase": "nombre de données échantillon", "sample-data-ingestion-configuration": "Configuration d'Ingestion des Données d'Exemple", + "sampling-method-type": "Type de méthode d'échantillonnage", "saturday": "Samedi", "save": "Enregistrer", "save-changes": "Enregistrer les modifications", @@ -1939,6 +1944,7 @@ "started-following": "A commencé à suivre", "starting-offset": "Décalage de départ", "state": "État", + "static": "Statique", "status": "Statut", "stay-up-to-date": "Rester à Jour", "step": "Étape", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "fils", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Seuil", + "threshold-plural": "Seuils", "thursday": "Jeudi", "tier": "Niveau", "tier-label-type": "Type d'Étiquette de Niveau", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json index 104543b3a51..dbefb35a694 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json @@ -650,6 +650,7 @@ "drive-plural": "Unidades", "duplicate": "Duplicar", "duration": "Duración", + "dynamic": "Dinámico", "dynamic-assertion": "Aserción dinámica", "edge": "Borde", "edge-bundling": "Agrupamento de Arestas", @@ -1568,6 +1569,7 @@ "profile-config": "Configuración do perfil", "profile-lowercase": "perfil", "profile-name": "Nome do perfil", + "profile-sample": "Mostra de perfil", "profile-sample-type": "Perfil de mostra {{type}}", "profiler": "Perfilador", "profiler-configuration": "Configuración do perfilador", @@ -1724,6 +1726,7 @@ "row": "Fila", "row-count": "Reconto de filas", "row-count-lowercase": "reconto de filas", + "row-count-threshold": "Limiar de contaxe de filas", "row-filter": "Filtro de fila", "row-filter-plural": "Filtros de fila", "row-limit": "Límite de filas", @@ -1748,10 +1751,12 @@ "runs-for": "Executado para", "s3-config-source": "Fonte de configuración S3", "sample": "Mostra", + "sample-config-type": "Tipo de configuración de mostra", "sample-data": "Datos de mostra", "sample-data-count": "Reconto de datos de mostra", "sample-data-count-lowercase": "reconto de datos de mostra", "sample-data-ingestion-configuration": "Configuración de Inxesta de Datos de Exemplo", + "sampling-method-type": "Tipo de método de mostraxe", "saturday": "Sábado", "save": "Gardar", "save-changes": "Gardar cambios", @@ -1939,6 +1944,7 @@ "started-following": "Comezou a seguir", "starting-offset": "Desprazamento inicial", "state": "Estado", + "static": "Estático", "status": "Estado", "stay-up-to-date": "Mantente ao día", "step": "Paso", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "fíos", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Limiar", + "threshold-plural": "Limiares", "thursday": "Xoves", "tier": "Nivel", "tier-label-type": "Tipo de Etiqueta de Nivel", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json index 391a3112539..4b8611409c1 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json @@ -650,6 +650,7 @@ "drive-plural": "כוננים", "duplicate": "שכפל", "duration": "משך זמן", + "dynamic": "דינמי", "dynamic-assertion": "אסרציה דינמית", "edge": "קצה", "edge-bundling": "צרור קשתות", @@ -1568,6 +1569,7 @@ "profile-config": "תצורת פרופיל", "profile-lowercase": "פרופיל", "profile-name": "שם הפרופיל", + "profile-sample": "דגימת פרופיל", "profile-sample-type": "דוגמת פרופיל {{type}}", "profiler": "מדד ואיכות נתונים", "profiler-configuration": "תצורת פרופיילר", @@ -1724,6 +1726,7 @@ "row": "שורה", "row-count": "מספר שורות", "row-count-lowercase": "מספר שורות", + "row-count-threshold": "סף ספירת שורות", "row-filter": "מסנן שורות", "row-filter-plural": "מסנני שורות", "row-limit": "מגבלת שורות", @@ -1748,10 +1751,12 @@ "runs-for": "ריצות עבור", "s3-config-source": "מקור הגדרות S3", "sample": "דוגמה", + "sample-config-type": "סוג תצורת דגימה", "sample-data": "נתוני דוגמה", "sample-data-count": "מספר נתוני דוגמה", "sample-data-count-lowercase": "מספר נתוני דוגמה", "sample-data-ingestion-configuration": "הגדרת קליטת נתוני דוגמה", + "sampling-method-type": "סוג שיטת דגימה", "saturday": "יום שבת", "save": "שמור", "save-changes": "שמירת שינויים", @@ -1939,6 +1944,7 @@ "started-following": "התחיל לעקוב", "starting-offset": "היסט התחלתי", "state": "מצב", + "static": "סטטי", "status": "סטטוס", "stay-up-to-date": "הישאר מעודכן", "step": "שלב", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "נושאים", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "סף", + "threshold-plural": "ספים", "thursday": "יום חמישי", "tier": "שכבת מידע", "tier-label-type": "סוג תווית שכבת מידע", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json index 2d9dc87ebd8..ee674389ee6 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json @@ -650,6 +650,7 @@ "drive-plural": "ドライブ", "duplicate": "重複", "duration": "所要時間", + "dynamic": "動的", "dynamic-assertion": "動的アサーション", "edge": "エッジ", "edge-bundling": "エッジバンドリング", @@ -1568,6 +1569,7 @@ "profile-config": "プロファイル設定", "profile-lowercase": "プロファイル", "profile-name": "プロファイル名", + "profile-sample": "プロファイルサンプル", "profile-sample-type": "サンプル {{type}} のプロファイル", "profiler": "プロファイラー", "profiler-configuration": "プロファイラー設定", @@ -1724,6 +1726,7 @@ "row": "行", "row-count": "行数", "row-count-lowercase": "行数", + "row-count-threshold": "行数しきい値", "row-filter": "行フィルター", "row-filter-plural": "行フィルター", "row-limit": "行数制限", @@ -1748,10 +1751,12 @@ "runs-for": "対象:", "s3-config-source": "S3 設定ソース", "sample": "サンプル", + "sample-config-type": "サンプル設定タイプ", "sample-data": "サンプルデータ", "sample-data-count": "サンプルデータ件数", "sample-data-count-lowercase": "サンプルデータ件数", "sample-data-ingestion-configuration": "サンプルデータ取り込み設定", + "sampling-method-type": "サンプリング方法タイプ", "saturday": "土曜日", "save": "保存", "save-changes": "変更を保存", @@ -1939,6 +1944,7 @@ "started-following": "フォローを開始しました", "starting-offset": "開始オフセット", "state": "状態", + "static": "静的", "status": "ステータス", "stay-up-to-date": "常に最新情報を取得", "step": "ステップ", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "スレッド", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "しきい値", + "threshold-plural": "しきい値", "thursday": "木曜日", "tier": "ティア", "tier-label-type": "ティアラベルタイプ", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json index 216fd1c3aba..805f7188324 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json @@ -650,6 +650,7 @@ "drive-plural": "드라이브", "duplicate": "복제", "duration": "기간", + "dynamic": "동적", "dynamic-assertion": "동적 단언", "edge": "엣지", "edge-bundling": "엣지 번들링", @@ -1568,6 +1569,7 @@ "profile-config": "프로필 구성", "profile-lowercase": "프로필", "profile-name": "프로필 이름", + "profile-sample": "프로파일 샘플", "profile-sample-type": "프로필 샘플 {{type}}", "profiler": "프로파일러", "profiler-configuration": "프로파일러 구성", @@ -1724,6 +1726,7 @@ "row": "행", "row-count": "행 수", "row-count-lowercase": "행 수", + "row-count-threshold": "행 수 임계값", "row-filter": "행 필터", "row-filter-plural": "행 필터", "row-limit": "행 제한", @@ -1748,10 +1751,12 @@ "runs-for": "실행 기간", "s3-config-source": "S3 설정 소스", "sample": "샘플", + "sample-config-type": "샘플 구성 유형", "sample-data": "샘플 데이터", "sample-data-count": "샘플 데이터 수", "sample-data-count-lowercase": "샘플 데이터 수", "sample-data-ingestion-configuration": "샘플 데이터 수집 구성", + "sampling-method-type": "샘플링 방법 유형", "saturday": "토요일", "save": "저장", "save-changes": "변경 사항 저장", @@ -1939,6 +1944,7 @@ "started-following": "팔로우 시작됨", "starting-offset": "시작 오프셋", "state": "상태", + "static": "정적", "status": "현황", "stay-up-to-date": "최신 정보 유지", "step": "단계", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "스레드들", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "임계값", + "threshold-plural": "임계값", "thursday": "목요일", "tier": "계층", "tier-label-type": "계층 라벨 유형", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json index 9491534acf9..1e0bf3c6c86 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json @@ -650,6 +650,7 @@ "drive-plural": "ड्राइव्हस्", "duplicate": "डुप्लिकेट", "duration": "कालावधी", + "dynamic": "गतिमान", "dynamic-assertion": "डायनॅमिक अॅसर्शन", "edge": "काठ", "edge-bundling": "कड्यांचे गटबंधन", @@ -1568,6 +1569,7 @@ "profile-config": "प्रोफाइल कॉन्फिग", "profile-lowercase": "प्रोफाइल", "profile-name": "प्रोफाइल नाव", + "profile-sample": "प्रोफाइल नमुना", "profile-sample-type": "प्रोफाइल नमुना {{type}}", "profiler": "प्रोफाइलर", "profiler-configuration": "प्रोफाइलर संरचना", @@ -1724,6 +1726,7 @@ "row": "पंक्ति", "row-count": "Row Count", "row-count-lowercase": "पंक्ति संख्या", + "row-count-threshold": "पंक्ती संख्या उंबरठा", "row-filter": "पंक्ती फिल्टर", "row-filter-plural": "पंक्ती फिल्टर", "row-limit": "पंक्ती मर्यादा", @@ -1748,10 +1751,12 @@ "runs-for": "साठी चालते", "s3-config-source": "S3 कॉन्फिग स्रोत", "sample": "नमुना", + "sample-config-type": "नमुना कॉन्फिग प्रकार", "sample-data": "नमुना डेटा", "sample-data-count": "नमुना डेटा संख्या", "sample-data-count-lowercase": "नमुना डेटा संख्या", "sample-data-ingestion-configuration": "नमुना डेटा अंतर्ग्रहण कॉन्फिगरेशन", + "sampling-method-type": "नमुना पद्धत प्रकार", "saturday": "शनिवार", "save": "जतन करा", "save-changes": "बदल जतन करा", @@ -1939,6 +1944,7 @@ "started-following": "अनुसरण करणे प्रारंभ केले", "starting-offset": "प्रारंभिक ऑफसेट", "state": "अवस्था", + "static": "स्थिर", "status": "स्थिती", "stay-up-to-date": "अप-टू-डेट रहा", "step": "पाऊल", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "थ्रेड्स", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "उंबरठा", + "threshold-plural": "उंबरठे", "thursday": "गुरुवार", "tier": "स्तर", "tier-label-type": "स्तर लेबल प्रकार", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json index 1783dbacd37..cbc20838c9b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json @@ -650,6 +650,7 @@ "drive-plural": "Schijven", "duplicate": "Dupliceren", "duration": "Duur", + "dynamic": "Dynamisch", "dynamic-assertion": "Dynamic Assertion", "edge": "Verbinding", "edge-bundling": "Kantenbundeling", @@ -1568,6 +1569,7 @@ "profile-config": "Profielconfiguratie", "profile-lowercase": "profiel", "profile-name": "Profielnaam", + "profile-sample": "Profielsteekproef", "profile-sample-type": "Voorbeeldprofiel {{type}}", "profiler": "Profiler", "profiler-configuration": "Profiler Configuration", @@ -1724,6 +1726,7 @@ "row": "Rij", "row-count": "Aantal rijen", "row-count-lowercase": "aantal rijen", + "row-count-threshold": "Drempelwaarde rijenaantal", "row-filter": "Rijfilter", "row-filter-plural": "Rijfilters", "row-limit": "Rijlimiet", @@ -1748,10 +1751,12 @@ "runs-for": "Uitgevoerd voor", "s3-config-source": "S3 Configuratiebron", "sample": "Voorbeeld", + "sample-config-type": "Type steekproefconfiguratie", "sample-data": "Voorbeelddata", "sample-data-count": "Aantal voorbeelddata", "sample-data-count-lowercase": "aantal voorbeelddata", "sample-data-ingestion-configuration": "Configuratie van Voorbeeldgegevens Opname", + "sampling-method-type": "Type steekproefmethode", "saturday": "zaterdag", "save": "Opslaan", "save-changes": "Wijzigingen opslaan", @@ -1939,6 +1944,7 @@ "started-following": "Begonnen met volgen", "starting-offset": "Startoffset", "state": "Toestand", + "static": "Statisch", "status": "Status", "stay-up-to-date": "Blijf Up-to-date", "step": "Stap", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "draadjes", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Drempelwaarde", + "threshold-plural": "Drempelwaarden", "thursday": "donderdag", "tier": "Niveau", "tier-label-type": "Niveau-etikettype", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json index 4a3aaf40012..59542b33cba 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json @@ -650,6 +650,7 @@ "drive-plural": "درایوها", "duplicate": "تکراری", "duration": "مدت زمان", + "dynamic": "Dinâmico", "dynamic-assertion": "ادعای پویا", "edge": "لبه", "edge-bundling": "دسته‌بندی یال‌ها", @@ -1568,6 +1569,7 @@ "profile-config": "پیکربندی پروفایل", "profile-lowercase": "پروفایل", "profile-name": "نام پروفایل", + "profile-sample": "Amostra de perfil", "profile-sample-type": "نوع نمونه پروفایل {{type}}", "profiler": "پروفایلر", "profiler-configuration": "پیکربندی پروفایلر", @@ -1724,6 +1726,7 @@ "row": "ردیف", "row-count": "Row Count", "row-count-lowercase": "تعداد ردیف", + "row-count-threshold": "Limiar de contagem de linhas", "row-filter": "ਕਤਾਰ ਫਿਲਟਰ", "row-filter-plural": "ਕਤਾਰ ਫਿਲਟਰ", "row-limit": "ਕਤਾਰ ਸੀਮਾ", @@ -1748,10 +1751,12 @@ "runs-for": "اجرا می‌شود برای", "s3-config-source": "منبع پیکربندی S3", "sample": "نمونه", + "sample-config-type": "Tipo de configuração de amostra", "sample-data": "داده‌های نمونه", "sample-data-count": "تعداد داده‌های نمونه", "sample-data-count-lowercase": "تعداد داده‌های نمونه", "sample-data-ingestion-configuration": "Sample Data Ingestion Configuration", + "sampling-method-type": "Tipo de método de amostragem", "saturday": "شنبه", "save": "ذخیره", "save-changes": "تبدیلیاں محفوظ کریں", @@ -1939,6 +1944,7 @@ "started-following": "شروع به دنبال کردن شد", "starting-offset": "آفست شروع", "state": "Estado", + "static": "Estático", "status": "وضعیت", "stay-up-to-date": "در جریان بمانید", "step": "مرحله", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "رشته‌ها", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Limiar", + "threshold-plural": "Limiares", "thursday": "پنج‌شنبه", "tier": "سطح", "tier-label-type": "نوع برچسب سطح", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json index ab0ef1be645..825956dcfab 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json @@ -650,6 +650,7 @@ "drive-plural": "Unidades", "duplicate": "Duplicar", "duration": "Duração", + "dynamic": "Dinâmico", "dynamic-assertion": "Asserção dinâmica", "edge": "Borda", "edge-bundling": "Agrupamento de Arestas", @@ -1568,6 +1569,7 @@ "profile-config": "Configuração de Perfil", "profile-lowercase": "perfil", "profile-name": "Nome do Perfil", + "profile-sample": "Amostra de perfil", "profile-sample-type": "Amostra de Perfil {{type}}", "profiler": "Criador de perfil", "profiler-configuration": "Configuração do Profiler", @@ -1724,6 +1726,7 @@ "row": "Linha", "row-count": "Contagem de linhas", "row-count-lowercase": "contagem de linhas", + "row-count-threshold": "Limiar de contagem de linhas", "row-filter": "Filtro de linha", "row-filter-plural": "Filtros de linha", "row-limit": "Limite de linhas", @@ -1748,10 +1751,12 @@ "runs-for": "Executa por", "s3-config-source": "Fonte de Configuração S3", "sample": "Amostra", + "sample-config-type": "Tipo de configuração de amostra", "sample-data": "Dados de Amostra", "sample-data-count": "Contagem de Dados de Amostra", "sample-data-count-lowercase": "contagem de dados de amostra", "sample-data-ingestion-configuration": "Configuração de Ingestão de Dados de Exemplo", + "sampling-method-type": "Tipo de método de amostragem", "saturday": "Sábado", "save": "Salvar", "save-changes": "Salvar alterações", @@ -1939,6 +1944,7 @@ "started-following": "Começou a seguir", "starting-offset": "Deslocamento inicial", "state": "Estado", + "static": "Estático", "status": "Status", "stay-up-to-date": "Mantenha-se Atualizado", "step": "Etapa", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "tópicos", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Limiar", + "threshold-plural": "Limiares", "thursday": "Quinta-feira", "tier": "Camada", "tier-label-type": "Tipo de Rótulo de Camada", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json index e183a1e6c9f..b056822ec6d 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json @@ -650,6 +650,7 @@ "drive-plural": "Unidades", "duplicate": "Duplicar", "duration": "Duração", + "dynamic": "Dinâmico", "dynamic-assertion": "Asserção Dinâmica", "edge": "Borda", "edge-bundling": "Agrupamento de Arestas", @@ -1568,6 +1569,7 @@ "profile-config": "Configuração de Perfil", "profile-lowercase": "perfil", "profile-name": "Nome do Perfil", + "profile-sample": "Amostra de perfil", "profile-sample-type": "Amostra de Perfil {{type}}", "profiler": "Profiler", "profiler-configuration": "Configuração do Profiler", @@ -1724,6 +1726,7 @@ "row": "Linha", "row-count": "Contagem de linhas", "row-count-lowercase": "contagem de linhas", + "row-count-threshold": "Limiar de contagem de linhas", "row-filter": "Filtro de linha", "row-filter-plural": "Filtros de linha", "row-limit": "Limite de linhas", @@ -1748,10 +1751,12 @@ "runs-for": "Executa por", "s3-config-source": "Fonte de Configuração S3", "sample": "Amostra", + "sample-config-type": "Tipo de configuração de amostra", "sample-data": "Dados de Amostra", "sample-data-count": "Contagem de Dados de Amostra", "sample-data-count-lowercase": "contagem de dados de amostra", "sample-data-ingestion-configuration": "Configuração de Ingestão de Dados de Exemplo", + "sampling-method-type": "Tipo de método de amostragem", "saturday": "Sábado", "save": "Salvar", "save-changes": "Guardar alterações", @@ -1939,6 +1944,7 @@ "started-following": "Começou a seguir", "starting-offset": "Offset Inicial", "state": "Estado", + "static": "Estático", "status": "Estado", "stay-up-to-date": "Mantenha-se Atualizado", "step": "Passo", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "discussões", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Limiar", + "threshold-plural": "Limiares", "thursday": "Quinta-feira", "tier": "Camada", "tier-label-type": "Tipo de Etiqueta de Camada", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json index 80c05f13336..66c041dd7bf 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json @@ -650,6 +650,7 @@ "drive-plural": "Диски", "duplicate": "Дубликат", "duration": "Длительность", + "dynamic": "Динамический", "dynamic-assertion": "Динамическое утверждение", "edge": "связь", "edge-bundling": "Группировка рёбер", @@ -1568,6 +1569,7 @@ "profile-config": "Конфигурация профиля", "profile-lowercase": "профиль", "profile-name": "Имя профиля", + "profile-sample": "Выборка профиля", "profile-sample-type": "Образец профиля {{type}}", "profiler": "Профайлер", "profiler-configuration": "Конфигурация профайлера", @@ -1724,6 +1726,7 @@ "row": "Строка", "row-count": "Количество строк", "row-count-lowercase": "количество строк", + "row-count-threshold": "Порог количества строк", "row-filter": "Фильтр строк", "row-filter-plural": "Фильтры строк", "row-limit": "Лимит строк", @@ -1748,10 +1751,12 @@ "runs-for": "Запущено для", "s3-config-source": "Источник конфигурации S3", "sample": "Пример", + "sample-config-type": "Тип конфигурации выборки", "sample-data": "Пример данных", "sample-data-count": "Количество примеров данных", "sample-data-count-lowercase": "количество примеров данных", "sample-data-ingestion-configuration": "Настройка загрузки примеров данных", + "sampling-method-type": "Тип метода выборки", "saturday": "Суббота", "save": "Сохранить", "save-changes": "Сохранить изменения", @@ -1939,6 +1944,7 @@ "started-following": "Начало отслеживания", "starting-offset": "Начальное смещение", "state": "Состояние", + "static": "Статический", "status": "Статус", "stay-up-to-date": "Будьте в курсе последних событий", "step": "Шаг", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "ветки", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Порог", + "threshold-plural": "Пороговые значения", "thursday": "Четверг", "tier": "Критичность", "tier-label-type": "Тип метки уровня", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json index 93882508141..ce983d32599 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json @@ -650,6 +650,7 @@ "drive-plural": "ไดรฟ์", "duplicate": "ซ้ำกัน", "duration": "ระยะเวลา", + "dynamic": "ไดนามิก", "dynamic-assertion": "การยืนยันแบบไดนามิก", "edge": "ขอบ", "edge-bundling": "การรวมกลุ่มเส้นเชื่อม", @@ -1568,6 +1569,7 @@ "profile-config": "การกำหนดค่าโปรไฟล์", "profile-lowercase": "โปรไฟล์", "profile-name": "ชื่อโปรไฟล์", + "profile-sample": "ตัวอย่างโปรไฟล์", "profile-sample-type": "ตัวอย่างโปรไฟล์ {{type}}", "profiler": "โปรไฟล์เลอร์", "profiler-configuration": "การกำหนดค่าโปรไฟล์เลอร์", @@ -1724,6 +1726,7 @@ "row": "แถว", "row-count": "จำนวนแถว", "row-count-lowercase": "จำนวนแถว", + "row-count-threshold": "เกณฑ์จำนวนแถว", "row-filter": "ตัวกรองแถว", "row-filter-plural": "ตัวกรองแถว", "row-limit": "จำกัดแถว", @@ -1748,10 +1751,12 @@ "runs-for": "รันสำหรับ", "s3-config-source": "แหล่งที่มาของการตั้งค่า S3", "sample": "ตัวอย่าง", + "sample-config-type": "ประเภทการกำหนดค่าตัวอย่าง", "sample-data": "ข้อมูลตัวอย่าง", "sample-data-count": "จำนวนข้อมูลตัวอย่าง", "sample-data-count-lowercase": "จำนวนข้อมูลตัวอย่าง", "sample-data-ingestion-configuration": "การกำหนดค่าการนำเข้าข้อมูลตัวอย่าง", + "sampling-method-type": "ประเภทวิธีการสุ่มตัวอย่าง", "saturday": "วันเสาร์", "save": "บันทึก", "save-changes": "บันทึกการเปลี่ยนแปลง", @@ -1939,6 +1944,7 @@ "started-following": "เริ่มติดตาม", "starting-offset": "ออฟเซ็ตเริ่มต้น", "state": "สถานะ", + "static": "สถิต", "status": "สถานะ", "stay-up-to-date": "อัปเดตอยู่เสมอ", "step": "ขั้นตอน", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "กระทู้หลายรายการ", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "เกณฑ์", + "threshold-plural": "เกณฑ์", "thursday": "วันพฤหัสบดี", "tier": "ระดับ", "tier-label-type": "ประเภทป้ายระดับ", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json index c3f5940ab7b..0d320bc6436 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json @@ -650,6 +650,7 @@ "drive-plural": "Sürücüler", "duplicate": "Kopya", "duration": "Süre", + "dynamic": "Dinamik", "dynamic-assertion": "Dinamik Beyan", "edge": "Kenar", "edge-bundling": "Kenar Gruplama", @@ -1568,6 +1569,7 @@ "profile-config": "Profil yapılandırması", "profile-lowercase": "profil", "profile-name": "Profil Adı", + "profile-sample": "Profil Örneği", "profile-sample-type": "Profil Örneği {{type}}", "profiler": "Profilleyici", "profiler-configuration": "Profilleyici Yapılandırması", @@ -1724,6 +1726,7 @@ "row": "Satır", "row-count": "Satır Sayısı", "row-count-lowercase": "satır sayısı", + "row-count-threshold": "Satır Sayısı Eşiği", "row-filter": "Satır filtresi", "row-filter-plural": "Satır filtreleri", "row-limit": "Satır Limiti", @@ -1748,10 +1751,12 @@ "runs-for": "Şunun için çalışır", "s3-config-source": "S3 Yapılandırma Kaynağı", "sample": "Örnek", + "sample-config-type": "Örnek Yapılandırma Türü", "sample-data": "Örnek Veri", "sample-data-count": "Örnek Veri Sayısı", "sample-data-count-lowercase": "örnek veri sayısı", "sample-data-ingestion-configuration": "Örnek Veri Alım Yapılandırması", + "sampling-method-type": "Örnekleme Yöntemi Türü", "saturday": "Cumartesi", "save": "Kaydet", "save-changes": "Değişiklikleri kaydet", @@ -1939,6 +1944,7 @@ "started-following": "Takip etmeye başladı", "starting-offset": "Başlangıç Ofseti", "state": "Durum", + "static": "Statik", "status": "Durum", "stay-up-to-date": "Güncel Kalın", "step": "Adım", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "konu başlıkları", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "Eşik", + "threshold-plural": "Eşikler", "thursday": "Perşembe", "tier": "Katman", "tier-label-type": "Katman Etiketi Türü", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json index d415c2d9d44..49d6ff3a788 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json @@ -650,6 +650,7 @@ "drive-plural": "驱动器", "duplicate": "复制", "duration": "持续时间", + "dynamic": "动态", "dynamic-assertion": "动态断言", "edge": "连线", "edge-bundling": "边捆绑", @@ -1568,6 +1569,7 @@ "profile-config": "分析器配置", "profile-lowercase": "分析", "profile-name": "分析器名称", + "profile-sample": "配置文件样本", "profile-sample-type": "分析样本{{type}}", "profiler": "分析器", "profiler-configuration": "分析器配置", @@ -1724,6 +1726,7 @@ "row": "行", "row-count": "行数", "row-count-lowercase": "行计数", + "row-count-threshold": "行数阈值", "row-filter": "行过滤器", "row-filter-plural": "行过滤器", "row-limit": "行数限制", @@ -1748,10 +1751,12 @@ "runs-for": "运行时长", "s3-config-source": "S3 配置源", "sample": "样本", + "sample-config-type": "采样配置类型", "sample-data": "样本数据", "sample-data-count": "样本数据计数", "sample-data-count-lowercase": "样本数据计数", "sample-data-ingestion-configuration": "示例数据采集配置", + "sampling-method-type": "采样方法类型", "saturday": "星期六", "save": "保存", "save-changes": "保存更改", @@ -1939,6 +1944,7 @@ "started-following": "开始关注", "starting-offset": "起始偏移量", "state": "状态", + "static": "静态", "status": "状态", "stay-up-to-date": "保持最新", "step": "步骤", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "线程", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "阈值", + "threshold-plural": "阈值", "thursday": "星期四", "tier": "分级", "tier-label-type": "分级标签类型", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json index 38268aa50e6..90cb968d4c6 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json @@ -650,6 +650,7 @@ "drive-plural": "驅動器", "duplicate": "重複", "duration": "持續時間", + "dynamic": "動態", "dynamic-assertion": "動態斷言", "edge": "邊緣", "edge-bundling": "邊捆綁", @@ -1568,6 +1569,7 @@ "profile-config": "設定檔組態", "profile-lowercase": "設定檔", "profile-name": "設定檔名稱", + "profile-sample": "設定檔樣本", "profile-sample-type": "設定檔範例 {{type}}", "profiler": "分析器", "profiler-configuration": "分析器組態", @@ -1724,6 +1726,7 @@ "row": "資料列", "row-count": "資料列計數", "row-count-lowercase": "資料列計數", + "row-count-threshold": "行數閾值", "row-filter": "列篩選器", "row-filter-plural": "列篩選器", "row-limit": "列數限制", @@ -1748,10 +1751,12 @@ "runs-for": "執行於", "s3-config-source": "S3 組態來源", "sample": "範例", + "sample-config-type": "抽樣設定類型", "sample-data": "範例資料", "sample-data-count": "範例資料計數", "sample-data-count-lowercase": "範例資料計數", "sample-data-ingestion-configuration": "範例資料擷取設定", + "sampling-method-type": "抽樣方法類型", "saturday": "星期六", "save": "儲存", "save-changes": "儲存更改", @@ -1939,6 +1944,7 @@ "started-following": "開始追蹤", "starting-offset": "起始偏移量", "state": "狀態", + "static": "靜態", "status": "狀態", "stay-up-to-date": "保持最新", "step": "步驟", @@ -2088,6 +2094,8 @@ "thread-plural-lowercase": "對話", "three-dash-symbol": "---", "three-dots-symbol": "•••", + "threshold": "閾值", + "threshold-plural": "閾值", "thursday": "星期四", "tier": "層級", "tier-label-type": "層級標籤類型",