Fix payload size issue (#27388)

* Fix Payload Size issue, increase  bufer

* Handle single entitiy with > 10 mb

* Single entity push

* Normalize SQl Queries

* Update generated TypeScript types

* Add Tests

* Fix Failing Test

* Revert fixes

* Fix Tests

* Strip Lineage

* Strip lIneage and make default 9 mb

* Add Warn log on Large entity size

* Review Comments

* Remove hierarchical fields

* remove team containing users

* revert unwanted changes

* Fix test failures from payload size default change

- Update mock expectations in SearchIndexExecutorControlFlowTest and
  DistributedJobParticipantTest to use DEFAULT_BULK_PAYLOAD_SIZE_BYTES
  instead of hardcoded 104857600L (old 100MB default)
- Remove "charts" from DashboardIndex excluded fields — charts are needed
  for search filters and column lineage resolution

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Fix hardcoded payload size fallback in DistributedJobParticipant

Replace hardcoded 104857600L with SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES
to use the centralized 9MB default consistently.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Spotless

* Update payLoadSize schema defaults to 9MB

The JSON schema default for payLoadSize was 104857600 (100MB), which
meant EventPublisherJob.getPayLoadSize() always returned 100MB instead
of null, bypassing the DEFAULT_BULK_PAYLOAD_SIZE_BYTES fallback in
DistributedJobParticipant. Align schema defaults with the 9MB bulk
payload limit.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Address review comments: fix log message, reuse getLineageData, fix test key algorithm

- Fix misleading log in stripLineageForSize: report post-strip size, not
  imply bytes removed
- Reuse getLineageData() in populateLineageData() instead of duplicating
  the DAO call and edge construction loop
- Fix AddUpdateLineageScriptTest key algorithm to use maxKey+1 matching
  the Painless script, avoiding key collisions after deletions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Address review: equals() in Painless, prune stale SQL keys, flush oversized single ops

- Use .equals() instead of == for string comparison in REMOVE_LINEAGE_SCRIPT
  Painless to be explicit about value equality and null-safe (params on left)
- Prune orphaned sqlQueryKey in ADD_UPDATE_LINEAGE when updating an edge
  with a different SQL query, preventing unbounded lineageSqlQueries growth
- Restore currentBufferSize >= maxPayloadSizeBytes check in
  CustomBulkProcessor.add() so a single oversized operation is flushed
  immediately rather than sitting in the buffer
- Update AddUpdateLineageScriptTest to mirror the new pruning logic

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mohit Yadav 2026-04-15 16:59:26 +05:30 committed by GitHub
parent e7b2d2681e
commit 08e52b96c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
114 changed files with 1415 additions and 315 deletions

View file

@ -46,6 +46,7 @@ import org.openmetadata.service.apps.bundles.searchIndex.stats.StatsResult;
import org.openmetadata.service.exception.EntityNotFoundException;
import org.openmetadata.service.exception.SearchIndexException;
import org.openmetadata.service.search.ReindexContext;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.SearchRepository;
import org.openmetadata.service.search.elasticsearch.ElasticSearchClient;
import org.openmetadata.service.search.elasticsearch.EsUtils;
@ -95,6 +96,7 @@ public class ElasticSearchBulkSink implements BulkSink {
private final ElasticSearchClient searchClient;
protected final SearchRepository searchRepository;
private final long maxPayloadSizeBytes;
private final CustomBulkProcessor bulkProcessor;
private final StepStats stats = new StepStats();
@ -133,6 +135,7 @@ public class ElasticSearchBulkSink implements BulkSink {
this.searchClient = (ElasticSearchClient) searchRepository.getSearchClient();
this.batchSize = batchSize;
this.maxConcurrentRequests = maxConcurrentRequests;
this.maxPayloadSizeBytes = maxPayloadSizeBytes;
// Initialize stats
stats.withTotalRecords(0).withSuccessRecords(0).withFailedRecords(0);
@ -294,7 +297,7 @@ public class ElasticSearchBulkSink implements BulkSink {
return null;
}
private static final int BULK_OPERATION_METADATA_OVERHEAD = 50;
private static final int BULK_OPERATION_METADATA_OVERHEAD = 150;
private void addEntity(
EntityInterface entity, String indexName, boolean recreateIndex, StageStatsTracker tracker) {
@ -303,16 +306,53 @@ public class ElasticSearchBulkSink implements BulkSink {
Object searchIndexDoc = Entity.buildSearchIndex(entityType, entity).buildSearchIndexDoc();
String json = JsonUtils.pojoToJson(searchIndexDoc);
String docId = entity.getId().toString();
long estimatedSize =
(long) json.getBytes(StandardCharsets.UTF_8).length + BULK_OPERATION_METADATA_OVERHEAD;
long rawDocSize = (long) json.getBytes(StandardCharsets.UTF_8).length;
long estimatedSize = rawDocSize + BULK_OPERATION_METADATA_OVERHEAD;
if (rawDocSize > 1024 * 1024) {
LOG.warn(
"Large indexed doc: entityType={}, docId={}, size={}MB",
entityType,
docId,
rawDocSize / (1024 * 1024));
}
if (estimatedSize > maxPayloadSizeBytes) {
long sizeLimit = maxPayloadSizeBytes - BULK_OPERATION_METADATA_OVERHEAD;
json = SearchIndexUtils.stripLineageForSize(json, sizeLimit, docId, entityType);
rawDocSize = json.getBytes(StandardCharsets.UTF_8).length;
estimatedSize = rawDocSize + BULK_OPERATION_METADATA_OVERHEAD;
}
if (estimatedSize > maxPayloadSizeBytes) {
LOG.warn(
"Document {} of type {} is too large for bulk ({} bytes), sending directly",
docId,
entityType,
rawDocSize);
totalSubmitted.incrementAndGet();
if (tracker != null) {
tracker.incrementPendingSink();
}
indexDocumentDirectly(indexName, docId, json, entityType, tracker);
processSuccess.incrementAndGet();
if (tracker != null) {
tracker.recordProcess(StatsResult.SUCCESS);
}
return;
}
final String indexableJson = json;
BulkOperation operation;
if (recreateIndex) {
operation =
BulkOperation.of(
op ->
op.index(
idx -> idx.index(indexName).id(docId).document(EsUtils.toJsonData(json))));
idx ->
idx.index(indexName)
.id(docId)
.document(EsUtils.toJsonData(indexableJson))));
} else {
operation =
BulkOperation.of(
@ -321,7 +361,10 @@ public class ElasticSearchBulkSink implements BulkSink {
upd ->
upd.index(indexName)
.id(docId)
.action(a -> a.doc(EsUtils.toJsonData(json)).docAsUpsert(true))));
.action(
a ->
a.doc(EsUtils.toJsonData(indexableJson))
.docAsUpsert(true))));
}
if (tracker != null) {
tracker.incrementPendingSink();
@ -369,6 +412,42 @@ public class ElasticSearchBulkSink implements BulkSink {
}
}
private void indexDocumentDirectly(
String indexName, String docId, String json, String entityType, StageStatsTracker tracker) {
try {
searchClient
.getNewClient()
.index(idx -> idx.index(indexName).id(docId).document(EsUtils.toJsonData(json)));
totalSuccess.incrementAndGet();
updateStats();
if (tracker != null) {
tracker.recordSink(StatsResult.SUCCESS);
}
} catch (Exception e) {
LOG.error(
"Direct index failed for document {} of type {}: {}",
docId,
entityType,
e.getMessage(),
e);
totalFailed.incrementAndGet();
updateStats();
if (tracker != null) {
tracker.recordSink(StatsResult.FAILED);
}
if (failureCallback != null) {
failureCallback.onFailure(
entityType,
docId,
null,
String.format(
"Document too large for bulk (%d bytes); direct index failed: %s",
json.getBytes(StandardCharsets.UTF_8).length, e.getMessage()),
IndexingFailureRecorder.FailureStage.SINK);
}
}
}
private void addTimeSeriesEntity(
EntityTimeSeriesInterface entity,
String indexName,
@ -755,6 +834,8 @@ public class ElasticSearchBulkSink implements BulkSink {
throw new IllegalStateException("Bulk processor is closed");
}
totalSubmitted.incrementAndGet();
if (docId != null) {
if (entityType != null) {
docIdToEntityType.put(docId, entityType);
@ -766,6 +847,10 @@ public class ElasticSearchBulkSink implements BulkSink {
long operationSize =
estimatedSizeBytes > 0 ? estimatedSizeBytes : estimateOperationSize(operation);
if (!buffer.isEmpty() && currentBufferSize + operationSize >= maxPayloadSizeBytes) {
flushInternal();
}
buffer.add(operation);
currentBufferSize += operationSize;
@ -852,8 +937,6 @@ public class ElasticSearchBulkSink implements BulkSink {
long executionId = executionIdCounter.incrementAndGet();
int numberOfActions = toFlush.size();
totalSubmitted.addAndGet(numberOfActions);
LOG.debug("Executing bulk request {} with {} actions", executionId, numberOfActions);
try {

View file

@ -42,6 +42,7 @@ import org.openmetadata.service.apps.bundles.searchIndex.stats.StatsResult;
import org.openmetadata.service.exception.EntityNotFoundException;
import org.openmetadata.service.exception.SearchIndexException;
import org.openmetadata.service.search.ReindexContext;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.SearchRepository;
import org.openmetadata.service.search.indexes.ColumnSearchIndex;
import org.openmetadata.service.search.opensearch.OpenSearchClient;
@ -107,6 +108,7 @@ public class OpenSearchBulkSink implements BulkSink {
private final OpenSearchClient searchClient;
protected final SearchRepository searchRepository;
private final long maxPayloadSizeBytes;
private final CustomBulkProcessor bulkProcessor;
private final StepStats stats = new StepStats();
@ -152,6 +154,7 @@ public class OpenSearchBulkSink implements BulkSink {
this.searchClient = (OpenSearchClient) searchRepository.getSearchClient();
this.batchSize = batchSize;
this.maxConcurrentRequests = maxConcurrentRequests;
this.maxPayloadSizeBytes = maxPayloadSizeBytes;
// Initialize stats
stats.withTotalRecords(0).withSuccessRecords(0).withFailedRecords(0);
@ -333,7 +336,7 @@ public class OpenSearchBulkSink implements BulkSink {
return null;
}
private static final int BULK_OPERATION_METADATA_OVERHEAD = 50;
private static final int BULK_OPERATION_METADATA_OVERHEAD = 150;
private void addEntity(
EntityInterface entity,
@ -354,10 +357,43 @@ public class OpenSearchBulkSink implements BulkSink {
String finalJson = json;
String docId = entity.getId().toString();
long estimatedSize =
(long) finalJson.getBytes(StandardCharsets.UTF_8).length
+ BULK_OPERATION_METADATA_OVERHEAD;
long rawDocSize = (long) finalJson.getBytes(StandardCharsets.UTF_8).length;
long estimatedSize = rawDocSize + BULK_OPERATION_METADATA_OVERHEAD;
if (rawDocSize > 1024 * 1024) {
LOG.warn(
"Large indexed doc: entityType={}, docId={}, size={}MB",
entityType,
docId,
rawDocSize / (1024 * 1024));
}
if (estimatedSize > maxPayloadSizeBytes) {
long sizeLimit = maxPayloadSizeBytes - BULK_OPERATION_METADATA_OVERHEAD;
finalJson = SearchIndexUtils.stripLineageForSize(finalJson, sizeLimit, docId, entityType);
rawDocSize = finalJson.getBytes(StandardCharsets.UTF_8).length;
estimatedSize = rawDocSize + BULK_OPERATION_METADATA_OVERHEAD;
}
if (estimatedSize > maxPayloadSizeBytes) {
LOG.warn(
"Document {} of type {} is too large for bulk ({} bytes), sending directly",
docId,
entityType,
rawDocSize);
totalSubmitted.incrementAndGet();
if (tracker != null) {
tracker.incrementPendingSink();
}
indexDocumentDirectly(indexName, docId, finalJson, entityType, tracker);
processSuccess.incrementAndGet();
if (tracker != null) {
tracker.recordProcess(StatsResult.SUCCESS);
}
return;
}
final String indexableJson = finalJson;
BulkOperation operation;
if (recreateIndex) {
operation =
@ -367,7 +403,7 @@ public class OpenSearchBulkSink implements BulkSink {
idx ->
idx.index(indexName)
.id(docId)
.document(OsUtils.toJsonData(finalJson))));
.document(OsUtils.toJsonData(indexableJson))));
} else {
operation =
BulkOperation.of(
@ -376,7 +412,7 @@ public class OpenSearchBulkSink implements BulkSink {
upd ->
upd.index(indexName)
.id(docId)
.document(OsUtils.toJsonData(finalJson))
.document(OsUtils.toJsonData(indexableJson))
.docAsUpsert(true)));
}
if (tracker != null) {
@ -425,6 +461,42 @@ public class OpenSearchBulkSink implements BulkSink {
}
}
private void indexDocumentDirectly(
String indexName, String docId, String json, String entityType, StageStatsTracker tracker) {
try {
searchClient
.getNewClient()
.index(idx -> idx.index(indexName).id(docId).document(OsUtils.toJsonData(json)));
totalSuccess.incrementAndGet();
updateStats();
if (tracker != null) {
tracker.recordSink(StatsResult.SUCCESS);
}
} catch (Exception e) {
LOG.error(
"Direct index failed for document {} of type {}: {}",
docId,
entityType,
e.getMessage(),
e);
totalFailed.incrementAndGet();
updateStats();
if (tracker != null) {
tracker.recordSink(StatsResult.FAILED);
}
if (failureCallback != null) {
failureCallback.onFailure(
entityType,
docId,
null,
String.format(
"Document too large for bulk (%d bytes); direct index failed: %s",
json.getBytes(StandardCharsets.UTF_8).length, e.getMessage()),
IndexingFailureRecorder.FailureStage.SINK);
}
}
}
private void addTimeSeriesEntity(
EntityTimeSeriesInterface entity,
String indexName,
@ -899,6 +971,8 @@ public class OpenSearchBulkSink implements BulkSink {
throw new IllegalStateException("Bulk processor is closed");
}
totalSubmitted.incrementAndGet();
if (docId != null) {
if (entityType != null) {
docIdToEntityType.put(docId, entityType);
@ -910,6 +984,10 @@ public class OpenSearchBulkSink implements BulkSink {
long operationSize =
estimatedSizeBytes > 0 ? estimatedSizeBytes : estimateOperationSize(operation);
if (!buffer.isEmpty() && currentBufferSize + operationSize >= maxPayloadSizeBytes) {
flushInternal();
}
buffer.add(operation);
currentBufferSize += operationSize;
@ -1001,8 +1079,6 @@ public class OpenSearchBulkSink implements BulkSink {
long executionId = executionIdCounter.incrementAndGet();
int numberOfActions = toFlush.size();
totalSubmitted.addAndGet(numberOfActions);
LOG.debug("Executing bulk request {} with {} actions", executionId, numberOfActions);
try {

View file

@ -46,7 +46,8 @@ public record ReindexingConfiguration(
private static final int DEFAULT_PRODUCER_THREADS = 1;
private static final int DEFAULT_QUEUE_SIZE = 100;
private static final int DEFAULT_MAX_CONCURRENT_REQUESTS = 100;
private static final long DEFAULT_PAYLOAD_SIZE = 104857600L;
private static final long DEFAULT_PAYLOAD_SIZE =
SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES;
private static final int DEFAULT_FIELD_FETCH_THREADS = 0;
private static final int DEFAULT_DOC_BUILD_THREADS = 0;
private static final long DEFAULT_STATS_INTERVAL_MS = 0;

View file

@ -28,6 +28,7 @@ import org.openmetadata.service.apps.bundles.searchIndex.IndexingFailureRecorder
import org.openmetadata.service.cache.CacheConfig;
import org.openmetadata.service.jdbi3.AppRepository;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.search.SearchClusterMetrics;
import org.openmetadata.service.search.SearchRepository;
/**
@ -333,7 +334,7 @@ public class DistributedJobParticipant implements Managed {
: 100,
job.getJobConfiguration().getPayLoadSize() != null
? job.getJobConfiguration().getPayLoadSize()
: 104857600L);
: SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES);
int batchSize =
job.getJobConfiguration().getBatchSize() != null

View file

@ -237,23 +237,95 @@ public interface SearchClient
""";
String REMOVE_LINEAGE_SCRIPT =
"ctx._source.upstreamLineage.removeIf(lineage -> lineage.docUniqueId == params.docUniqueId)";
"""
def removedKeys = new HashSet();
for (def lineage : ctx._source.upstreamLineage) {
if (params.docUniqueId.equals(lineage.docUniqueId) && lineage.containsKey('sqlQueryKey')) {
removedKeys.add(lineage.sqlQueryKey);
}
}
ctx._source.upstreamLineage.removeIf(lineage -> params.docUniqueId.equals(lineage.docUniqueId));
if (!removedKeys.isEmpty() && ctx._source.containsKey('lineageSqlQueries') && ctx._source.lineageSqlQueries != null) {
def sqlMap = ctx._source.lineageSqlQueries;
def usedKeys = new HashSet();
for (def lineage : ctx._source.upstreamLineage) {
if (lineage.containsKey('sqlQueryKey')) {
usedKeys.add(lineage.sqlQueryKey);
}
}
removedKeys.removeAll(usedKeys);
for (def key : removedKeys) {
sqlMap.remove(key);
}
}
""";
String REMOVE_ENTITY_RELATIONSHIP =
"ctx._source.upstreamEntityRelationship.removeIf(relationship -> relationship.docId == params.docId)";
String ADD_UPDATE_LINEAGE =
"""
// Dedup sqlQuery into the doc-level lineageSqlQueries map.
// If the incoming edge carries a sqlQuery, store it once in lineageSqlQueries
// keyed by a sequential integer, then replace sqlQuery with sqlQueryKey on the edge.
def rawSql = params.lineageData['sqlQuery'];
Map edgeData;
if (rawSql != null && !rawSql.isEmpty()) {
if (!ctx._source.containsKey('lineageSqlQueries') || ctx._source['lineageSqlQueries'] == null) {
ctx._source['lineageSqlQueries'] = new HashMap();
}
def sqlMap = ctx._source['lineageSqlQueries'];
def sqlKey = null;
for (def entry : sqlMap.entrySet()) {
if (entry.getValue().equals(rawSql)) {
sqlKey = entry.getKey();
break;
}
}
if (sqlKey == null) {
def maxKey = 0;
for (def k : sqlMap.keySet()) {
def kInt = Integer.parseInt(k);
if (kInt > maxKey) maxKey = kInt;
}
sqlKey = String.valueOf(maxKey + 1);
sqlMap.put(sqlKey, rawSql);
}
edgeData = new HashMap();
edgeData.putAll(params.lineageData);
edgeData.put('sqlQueryKey', sqlKey);
edgeData.remove('sqlQuery');
} else {
edgeData = params.lineageData;
}
// Replace or add the edge, capturing the old sqlQueryKey for cleanup.
def oldSqlQueryKey = null;
boolean docIdExists = false;
for (int i = 0; i < ctx._source.upstreamLineage.size(); i++) {
if (ctx._source.upstreamLineage[i].docUniqueId.equalsIgnoreCase(params.lineageData.docUniqueId)) {
ctx._source.upstreamLineage[i] = params.lineageData;
if (ctx._source.upstreamLineage[i].containsKey('sqlQueryKey')) {
oldSqlQueryKey = ctx._source.upstreamLineage[i].sqlQueryKey;
}
ctx._source.upstreamLineage[i] = edgeData;
docIdExists = true;
break;
}
}
if (!docIdExists) {
ctx._source.upstreamLineage.add(params.lineageData);
ctx._source.upstreamLineage.add(edgeData);
}
// Prune the old SQL key if it changed and is no longer used by any edge.
if (oldSqlQueryKey != null && !oldSqlQueryKey.equals(edgeData.containsKey('sqlQueryKey') ? edgeData.get('sqlQueryKey') : null)) {
boolean stillUsed = false;
for (def lineage : ctx._source.upstreamLineage) {
if (lineage.containsKey('sqlQueryKey') && oldSqlQueryKey.equals(lineage.sqlQueryKey)) {
stillUsed = true;
break;
}
}
if (!stillUsed && ctx._source.containsKey('lineageSqlQueries') && ctx._source.lineageSqlQueries != null) {
ctx._source.lineageSqlQueries.remove(oldSqlQueryKey);
}
}
""";

View file

@ -34,7 +34,9 @@ public class SearchClusterMetrics {
public static final long DEFAULT_HEAP_USED_BYTES = 512L * 1024 * 1024; // 512 MB
public static final long DEFAULT_HEAP_MAX_BYTES = 1024L * 1024 * 1024; // 1 GB
public static final long DEFAULT_MAX_CONTENT_LENGTH =
10 * 1024 * 1024L; // Conservative 10MB default
10 * 1024 * 1024L; // Conservative 10MB default (AWS OpenSearch hard limit)
// Safe bulk payload threshold: 90% of max_content_length to leave headroom for HTTP framing
public static final long DEFAULT_BULK_PAYLOAD_SIZE_BYTES = DEFAULT_MAX_CONTENT_LENGTH * 9 / 10;
public static SearchClusterMetrics fetchClusterMetrics(
SearchRepository searchRepository, long totalEntities, int maxDbConnections) {
@ -445,9 +447,9 @@ public class SearchClusterMetrics {
long usedHeap = totalHeap - freeHeap;
double heapUsagePercent = (maxHeap > 0) ? (double) usedHeap / maxHeap * 100 : 50.0;
// Default to conservative 10MB for AWS-managed clusters if we can't fetch from cluster
long maxContentLength = DEFAULT_MAX_CONTENT_LENGTH; // Conservative 10MB default
long maxPayloadSize = DEFAULT_MAX_CONTENT_LENGTH; // Conservative 10MB default
// AWS-managed clusters expose max_content_length=10MB; use 90% as bulk threshold for headroom
long maxContentLength = DEFAULT_MAX_CONTENT_LENGTH;
long maxPayloadSize = DEFAULT_BULK_PAYLOAD_SIZE_BYTES;
try {
if (searchRepository != null) {
SearchClient searchClient = searchRepository.getSearchClient();

View file

@ -481,7 +481,9 @@ public class SearchIndexRetryWorker implements Managed {
Set<String> failedEntityIds = ConcurrentHashMap.newKeySet();
AtomicReference<String> firstFailureDetail = new AtomicReference<>();
BulkSink bulkSink = searchRepository.createBulkSink(200, 5, 10L * 1024L * 1024L);
BulkSink bulkSink =
searchRepository.createBulkSink(
200, 5, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES);
bulkSink.setFailureCallback(
(entityType, entityId, entityFqn, errorMessage, stage) -> {
if (entityId != null && !entityId.isEmpty()) {

View file

@ -3,15 +3,18 @@ package org.openmetadata.service.search;
import static org.openmetadata.service.search.SearchUtils.getAggregationBuckets;
import static org.openmetadata.service.search.SearchUtils.getAggregationObject;
import com.fasterxml.jackson.core.type.TypeReference;
import jakarta.json.JsonArray;
import jakarta.json.JsonNumber;
import jakarta.json.JsonObject;
import jakarta.json.JsonString;
import jakarta.json.JsonValue;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@ -21,6 +24,7 @@ import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.openmetadata.schema.ColumnsEntityInterface;
import org.openmetadata.schema.EntityInterface;
import org.openmetadata.schema.api.lineage.EsLineageData;
import org.openmetadata.schema.tests.DataQualityReport;
import org.openmetadata.schema.tests.Datum;
import org.openmetadata.schema.tests.type.DataQualityReportMetadata;
@ -44,6 +48,117 @@ public final class SearchIndexUtils {
private SearchIndexUtils() {}
/**
* Deduplicates identical SQL queries across lineage edges in-place.
*
* <p>Each unique SQL text is assigned a sequential integer key ("1", "2", ). Every edge that
* carries that SQL has its {@code sqlQuery} cleared and {@code sqlQueryKey} set to the shared
* key. The returned map contains {@code key sqlText} for all unique SQLs found.
*
* <p>Edges with no SQL are left untouched.
*/
public static Map<String, String> deduplicateSqlAcrossEdges(List<EsLineageData> edges) {
Map<String, String> sqlTextToKey = new LinkedHashMap<>();
Map<String, String> sqlQueries = new LinkedHashMap<>();
int[] counter = {0};
for (EsLineageData edge : edges) {
String sql = edge.getSqlQuery();
if (sql != null && !sql.isEmpty()) {
String key =
sqlTextToKey.computeIfAbsent(
sql,
k -> {
String newKey = String.valueOf(++counter[0]);
sqlQueries.put(newKey, sql);
return newKey;
});
edge.setSqlQueryKey(key);
edge.setSqlQuery(null);
}
}
return sqlQueries;
}
/**
* Progressively strips lineage fields from a search document JSON to bring it under maxBytes.
*
* <p>Stripping order: lineageSqlQueries first (retains topology), then upstreamLineage.
* Returns the (possibly stripped) JSON caller must re-check size and handle the still-oversized
* case.
*/
public static String stripLineageForSize(
String json, long maxBytes, String docId, String entityType) {
if (json.getBytes(StandardCharsets.UTF_8).length <= maxBytes) {
return json;
}
TypeReference<Map<String, Object>> mapType = new TypeReference<>() {};
Map<String, Object> doc = JsonUtils.readValue(json, mapType);
if (doc.remove("lineageSqlQueries") != null) {
stripSqlQueryKeysFromEdges(doc);
json = JsonUtils.pojoToJson(doc);
int sizeAfterStrip = json.getBytes(StandardCharsets.UTF_8).length;
LOG.warn(
"Document {} ({}) too large, stripped lineageSqlQueries (size now {} bytes)",
docId,
entityType,
sizeAfterStrip);
if (sizeAfterStrip <= maxBytes) {
return json;
}
}
doc.remove("upstreamLineage");
json = JsonUtils.pojoToJson(doc);
LOG.warn(
"Document {} ({}) still too large, stripped upstreamLineage (size now {} bytes)",
docId,
entityType,
json.getBytes(StandardCharsets.UTF_8).length);
return json;
}
public static Map<String, Object> stripDocMapIfOversized(
Map<String, Object> doc, long maxBytes, String docId, String entityType) {
String json = JsonUtils.pojoToJson(doc);
if (json.getBytes(StandardCharsets.UTF_8).length <= maxBytes) {
return doc;
}
if (doc.remove("lineageSqlQueries") != null) {
stripSqlQueryKeysFromEdges(doc);
json = JsonUtils.pojoToJson(doc);
int strippedSize = json.getBytes(StandardCharsets.UTF_8).length;
LOG.warn(
"Live index doc {} ({}) too large, stripped lineageSqlQueries ({} bytes)",
docId,
entityType,
strippedSize);
if (strippedSize <= maxBytes) {
return doc;
}
}
if (doc.remove("upstreamLineage") != null) {
LOG.warn(
"Live index doc {} ({}) still too large, stripped upstreamLineage ({} bytes)",
docId,
entityType,
JsonUtils.pojoToJson(doc).getBytes(StandardCharsets.UTF_8).length);
}
return doc;
}
@SuppressWarnings("unchecked")
private static void stripSqlQueryKeysFromEdges(Map<String, Object> doc) {
Object lineage = doc.get("upstreamLineage");
if (lineage instanceof List<?> edges) {
for (Object edge : edges) {
if (edge instanceof Map<?, ?> edgeMap) {
((Map<String, Object>) edgeMap).remove("sqlQueryKey");
}
}
}
}
public static List<String> parseFollowers(List<EntityReference> followersRef) {
if (followersRef == null) {
return Collections.emptyList();

View file

@ -1081,10 +1081,11 @@ public class SearchRepository {
}
SearchIndex elasticSearchIndex = searchIndexFactory.buildIndex(entityType, entity);
doc = elasticSearchIndex.buildSearchIndexDoc();
doc =
SearchIndexUtils.stripDocMapIfOversized(
doc, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES, entityId, entityType);
}
// Use synchronous update to ensure tests pass
// TODO: Consider using async updates with proper wait mechanisms in tests
searchClient.updateEntity(indexMapping.getIndexName(clusterAlias), entityId, doc, scriptTxt);
if (Entity.TABLE.equals(entityType)) {
@ -1242,7 +1243,7 @@ public class SearchRepository {
int batchSize = 100;
int maxConcurrentRequests = 5;
long maxPayloadSizeBytes = 10 * 1024 * 1024; // 10MB
long maxPayloadSizeBytes = SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES;
// Process each entity type separately to ensure correct index routing
for (Map.Entry<String, List<EntityInterface>> entry : entitiesByType.entrySet()) {

View file

@ -1,6 +1,7 @@
package org.openmetadata.service.search.indexes;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.data.APICollection;
import org.openmetadata.service.Entity;
@ -16,6 +17,11 @@ public record APICollectionIndex(APICollection apiCollection) implements Taggabl
return Entity.API_COLLECTION;
}
@Override
public Set<String> getExcludedFields() {
return Set.of("apiEndpoints");
}
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
return doc;
}

View file

@ -14,6 +14,7 @@ import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.FlattenColumn;
public record ContainerIndex(Container container) implements ColumnIndex, DataAssetIndex {
@Override
public Object getEntity() {
return container;
@ -29,6 +30,11 @@ public record ContainerIndex(Container container) implements ColumnIndex, DataAs
return container.getServiceType();
}
@Override
public Set<String> getExcludedFields() {
return Set.of("children");
}
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
if (container.getDataModel() != null && container.getDataModel().getColumns() != null) {
List<FlattenColumn> cols = new ArrayList<>();

View file

@ -1,6 +1,7 @@
package org.openmetadata.service.search.indexes;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.data.Dashboard;
import org.openmetadata.service.Entity;
@ -26,6 +27,11 @@ public class DashboardIndex implements DataAssetIndex {
return dashboard.getServiceType();
}
@Override
public Set<String> getExcludedFields() {
return Set.of("dataModels");
}
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
return doc;
}

View file

@ -1,10 +1,12 @@
package org.openmetadata.service.search.indexes;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.data.Database;
import org.openmetadata.service.Entity;
public record DatabaseIndex(Database database) implements TaggableIndex {
@Override
public Object getEntity() {
return database;
@ -15,6 +17,11 @@ public record DatabaseIndex(Database database) implements TaggableIndex {
return Entity.DATABASE;
}
@Override
public Set<String> getExcludedFields() {
return Set.of("databaseSchemas");
}
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
return doc;
}

View file

@ -1,6 +1,7 @@
package org.openmetadata.service.search.indexes;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.data.DatabaseSchema;
import org.openmetadata.service.Entity;
@ -16,6 +17,11 @@ public record DatabaseSchemaIndex(DatabaseSchema databaseSchema) implements Tagg
return Entity.DATABASE_SCHEMA;
}
@Override
public Set<String> getExcludedFields() {
return Set.of("tables");
}
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
return doc;
}

View file

@ -1,6 +1,7 @@
package org.openmetadata.service.search.indexes;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.data.Glossary;
import org.openmetadata.schema.entity.data.GlossaryTerm;
import org.openmetadata.schema.type.Include;
@ -23,6 +24,11 @@ public class GlossaryTermIndex implements TaggableIndex {
return Entity.GLOSSARY_TERM;
}
@Override
public Set<String> getExcludedFields() {
return Set.of("children");
}
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
if (doc.containsKey("glossary") && glossaryTerm.getGlossary() != null) {
@SuppressWarnings("unchecked")

View file

@ -1,6 +1,7 @@
package org.openmetadata.service.search.indexes;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.services.LLMService;
import org.openmetadata.service.Entity;
@ -16,6 +17,11 @@ public record LlmServiceIndex(LLMService llmService) implements TaggableIndex, L
return Entity.LLM_SERVICE;
}
@Override
public Set<String> getExcludedFields() {
return Set.of("models");
}
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
return doc;
}

View file

@ -254,6 +254,23 @@ public interface SearchIndex {
return data;
}
/**
* Populates upstreamLineage and lineageSqlQueries in the given search doc map.
*
* <p>Identical SQL queries across edges are deduplicated: the full text is stored once in
* lineageSqlQueries keyed by a sequential integer, and each edge carries only the key via
* sqlQueryKey. Edges with unique SQL still get their SQL stored (and keyed). The authoritative
* per-edge SQL remains in the database; this deduplication is search-doc-local.
*/
static void populateLineageData(Map<String, Object> doc, EntityReference entity) {
List<EsLineageData> edges = getLineageData(entity);
Map<String, String> sqlQueries = SearchIndexUtils.deduplicateSqlAcrossEdges(edges);
doc.put("upstreamLineage", edges);
if (!sqlQueries.isEmpty()) {
doc.put("lineageSqlQueries", sqlQueries);
}
}
static List<Map<String, Object>> populateUpstreamEntityRelationshipData(Table entity) {
List<Map<String, Object>> upstreamRelationships = new ArrayList<>();

View file

@ -7,7 +7,7 @@ import org.openmetadata.service.Entity;
public class TeamIndex implements SearchIndex {
final Team team;
final Set<String> excludeFields = Set.of("owns");
final Set<String> excludeFields = Set.of("owns", "users", "defaultRoles", "inheritedRoles");
public TeamIndex(Team team) {
this.team = team;

View file

@ -64,6 +64,7 @@ import org.openmetadata.service.search.DefaultRecreateHandler;
import org.openmetadata.service.search.EntityReindexContext;
import org.openmetadata.service.search.RecreateIndexHandler;
import org.openmetadata.service.search.ReindexContext;
import org.openmetadata.service.search.SearchClusterMetrics;
import org.openmetadata.service.search.SearchRepository;
import org.openmetadata.service.util.FullyQualifiedName;
import org.openmetadata.service.util.RestUtil;
@ -893,7 +894,9 @@ class SearchIndexExecutorControlFlowTest {
when(collectionDAO.searchIndexFailureDAO()).thenReturn(failureDao);
when(entityRepository.getDao()).thenReturn(entityDao);
when(entityDao.listCount(any(ListFilter.class))).thenReturn(0);
when(searchRepository.createBulkSink(100, 100, 104857600L)).thenReturn(sink);
when(searchRepository.createBulkSink(
100, 100, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES))
.thenReturn(sink);
when(searchRepository.createReindexHandler()).thenReturn(handler);
when(handler.reCreateIndexes(Set.of(Entity.TABLE))).thenReturn(recreateContext);
executor.addListener(listener);
@ -931,7 +934,8 @@ class SearchIndexExecutorControlFlowTest {
when(jobContext.getJobId()).thenReturn(UUID.randomUUID());
when(entityRepository.getDao()).thenReturn(entityDao);
when(entityDao.listCount(any(ListFilter.class))).thenReturn(0);
when(searchRepository.createBulkSink(100, 100, 104857600L))
when(searchRepository.createBulkSink(
100, 100, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES))
.thenThrow(new IllegalStateException("sink init failed"));
executor.addListener(listener);

View file

@ -67,6 +67,7 @@ import org.openmetadata.service.apps.bundles.searchIndex.IndexingFailureRecorder
import org.openmetadata.service.cache.CacheConfig;
import org.openmetadata.service.jdbi3.AppRepository;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.search.SearchClusterMetrics;
import org.openmetadata.service.search.SearchRepository;
@ExtendWith(MockitoExtension.class)
@ -1028,7 +1029,9 @@ class DistributedJobParticipantTest {
CollectionDAO.SearchIndexFailureDAO failureDao =
mock(CollectionDAO.SearchIndexFailureDAO.class);
when(collectionDAO.searchIndexFailureDAO()).thenReturn(failureDao);
when(searchRepository.createBulkSink(100, 100, 104857600L)).thenReturn(bulkSink);
when(searchRepository.createBulkSink(
100, 100, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES))
.thenReturn(bulkSink);
when(bulkSink.flushAndAwait(60)).thenReturn(false);
try (MockedConstruction<DistributedSearchIndexCoordinator> coordinatorMocked =
@ -1052,7 +1055,8 @@ class DistributedJobParticipantTest {
invokeParticipantMethod(
"processJobPartitions", new Class<?>[] {SearchIndexJob.class}, runningJob);
verify(searchRepository).createBulkSink(100, 100, 104857600L);
verify(searchRepository)
.createBulkSink(100, 100, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES);
verify(bulkSink).flushAndAwait(60);
assertTrue(Thread.currentThread().isInterrupted());
verify(coordinatorMocked.constructed().get(0)).claimNextPartition(jobId);

View file

@ -0,0 +1,250 @@
package org.openmetadata.service.search.indexes;
import static org.junit.jupiter.api.Assertions.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.openmetadata.service.search.SearchClient;
/**
* Tests the SQL deduplication logic embedded in {@link SearchClient#ADD_UPDATE_LINEAGE}.
*
* <p>The Painless script runs server-side and cannot be executed in a unit test, so these tests
* implement the equivalent logic in Java. Any change to the script must be mirrored here (and
* vice versa) so regressions are caught before deployment.
*
* <p>The script must:
* <ol>
* <li>Detect a non-empty {@code sqlQuery} on the incoming edge.
* <li>Store the SQL text once in the doc-level {@code lineageSqlQueries} map, keyed by a
* sequential integer.
* <li>Replace {@code sqlQuery} on the edge with a {@code sqlQueryKey} reference.
* <li>If the same SQL already exists in the map, reuse the existing key.
* <li>Add the edge to {@code upstreamLineage} or update the existing entry by {@code docUniqueId}.
* </ol>
*/
class AddUpdateLineageScriptTest {
/**
* Java implementation of ADD_UPDATE_LINEAGE mirrors the Painless script exactly.
* Update this whenever the script in SearchClient.java is changed.
*/
@SuppressWarnings("unchecked")
private void runScript(Map<String, Object> doc, Map<String, Object> lineageData) {
String rawSql = (String) lineageData.get("sqlQuery");
Map<String, Object> edgeData;
if (rawSql != null && !rawSql.isEmpty()) {
Map<String, String> sqlMap =
(Map<String, String>)
doc.computeIfAbsent("lineageSqlQueries", k -> new LinkedHashMap<>());
String sqlKey = null;
for (Map.Entry<String, String> entry : sqlMap.entrySet()) {
if (entry.getValue().equals(rawSql)) {
sqlKey = entry.getKey();
break;
}
}
if (sqlKey == null) {
int maxKey = 0;
for (String k : sqlMap.keySet()) {
int kInt = Integer.parseInt(k);
if (kInt > maxKey) maxKey = kInt;
}
sqlKey = String.valueOf(maxKey + 1);
sqlMap.put(sqlKey, rawSql);
}
edgeData = new HashMap<>(lineageData);
edgeData.put("sqlQueryKey", sqlKey);
edgeData.remove("sqlQuery");
} else {
edgeData = new HashMap<>(lineageData);
}
List<Map<String, Object>> upstreamLineage =
(List<Map<String, Object>>) doc.get("upstreamLineage");
String oldSqlQueryKey = null;
boolean found = false;
for (int i = 0; i < upstreamLineage.size(); i++) {
String existingId = (String) upstreamLineage.get(i).get("docUniqueId");
String incomingId = (String) lineageData.get("docUniqueId");
if (existingId != null && existingId.equalsIgnoreCase(incomingId)) {
oldSqlQueryKey = (String) upstreamLineage.get(i).get("sqlQueryKey");
upstreamLineage.set(i, edgeData);
found = true;
break;
}
}
if (!found) {
upstreamLineage.add(edgeData);
}
// Prune old SQL key if it changed and is no longer used by any edge
String newSqlQueryKey = (String) edgeData.get("sqlQueryKey");
if (oldSqlQueryKey != null && !oldSqlQueryKey.equals(newSqlQueryKey)) {
boolean stillUsed = false;
for (Map<String, Object> lineage : upstreamLineage) {
if (oldSqlQueryKey.equals(lineage.get("sqlQueryKey"))) {
stillUsed = true;
break;
}
}
@SuppressWarnings("unchecked")
Map<String, String> sqlMap = (Map<String, String>) doc.get("lineageSqlQueries");
if (!stillUsed && sqlMap != null) {
sqlMap.remove(oldSqlQueryKey);
}
}
}
private Map<String, Object> emptyDoc() {
Map<String, Object> doc = new HashMap<>();
doc.put("upstreamLineage", new ArrayList<>());
return doc;
}
private Map<String, Object> edge(String docUniqueId, String sql) {
Map<String, Object> edge = new HashMap<>();
edge.put("docUniqueId", docUniqueId);
if (sql != null) {
edge.put("sqlQuery", sql);
}
return edge;
}
// script constant smoke test
@Test
void scriptConstantContainsDedupFields() {
assertTrue(
SearchClient.ADD_UPDATE_LINEAGE.contains("lineageSqlQueries"),
"Script must reference lineageSqlQueries");
assertTrue(
SearchClient.ADD_UPDATE_LINEAGE.contains("sqlQueryKey"),
"Script must set sqlQueryKey on the edge");
assertTrue(
SearchClient.ADD_UPDATE_LINEAGE.contains("sqlQuery"),
"Script must read sqlQuery from the incoming edge");
}
// deduplication logic tests
@Test
@SuppressWarnings("unchecked")
void firstEdgeWithSql_storedInMapAndKeySet() {
Map<String, Object> doc = emptyDoc();
runScript(doc, edge("edge-1", "SELECT * FROM src"));
List<Map<String, Object>> edges = (List<Map<String, Object>>) doc.get("upstreamLineage");
Map<String, String> sqlMap = (Map<String, String>) doc.get("lineageSqlQueries");
assertEquals(1, edges.size());
assertEquals("1", edges.get(0).get("sqlQueryKey"));
assertNull(edges.get(0).get("sqlQuery"), "sqlQuery must be cleared from edge");
assertEquals(Map.of("1", "SELECT * FROM src"), sqlMap);
}
@Test
@SuppressWarnings("unchecked")
void secondEdgeWithSameSql_reusesKey() {
Map<String, Object> doc = emptyDoc();
runScript(doc, edge("edge-1", "SELECT * FROM src"));
runScript(doc, edge("edge-2", "SELECT * FROM src"));
List<Map<String, Object>> edges = (List<Map<String, Object>>) doc.get("upstreamLineage");
Map<String, String> sqlMap = (Map<String, String>) doc.get("lineageSqlQueries");
assertEquals(2, edges.size());
assertEquals("1", edges.get(0).get("sqlQueryKey"));
assertEquals("1", edges.get(1).get("sqlQueryKey"), "same SQL must reuse the same key");
assertEquals(1, sqlMap.size(), "SQL stored exactly once even with 2 edges");
}
@Test
@SuppressWarnings("unchecked")
void edgesWithDistinctSqls_getSequentialKeys() {
Map<String, Object> doc = emptyDoc();
runScript(doc, edge("edge-1", "SELECT a FROM t1"));
runScript(doc, edge("edge-2", "SELECT b FROM t2"));
runScript(doc, edge("edge-3", "SELECT c FROM t3"));
Map<String, String> sqlMap = (Map<String, String>) doc.get("lineageSqlQueries");
assertEquals(3, sqlMap.size());
assertEquals("SELECT a FROM t1", sqlMap.get("1"));
assertEquals("SELECT b FROM t2", sqlMap.get("2"));
assertEquals("SELECT c FROM t3", sqlMap.get("3"));
}
@Test
@SuppressWarnings("unchecked")
void edgeWithNoSql_notModified_noMapEntry() {
Map<String, Object> doc = emptyDoc();
runScript(doc, edge("edge-1", null));
List<Map<String, Object>> edges = (List<Map<String, Object>>) doc.get("upstreamLineage");
assertEquals(1, edges.size());
assertNull(edges.get(0).get("sqlQueryKey"), "edge without SQL must not get a key");
assertFalse(doc.containsKey("lineageSqlQueries"), "no SQL map created when no SQL present");
}
@Test
@SuppressWarnings("unchecked")
void updateExistingEdge_replacesInPlace() {
Map<String, Object> doc = emptyDoc();
runScript(doc, edge("edge-1", "SELECT old FROM t"));
Map<String, Object> updatedEdge = edge("edge-1", "SELECT new FROM t");
runScript(doc, updatedEdge);
List<Map<String, Object>> edges = (List<Map<String, Object>>) doc.get("upstreamLineage");
Map<String, String> sqlMap = (Map<String, String>) doc.get("lineageSqlQueries");
assertEquals(1, edges.size(), "update must not add a second entry");
assertEquals(1, sqlMap.size(), "old unused SQL key is pruned");
assertEquals("2", edges.get(0).get("sqlQueryKey"), "updated edge points to new SQL key");
assertEquals("SELECT new FROM t", sqlMap.get("2"), "map contains only the new SQL");
}
@Test
@SuppressWarnings("unchecked")
void batchRunScenario_660EdgesSameSql_oneMapEntry() {
String largeSql = "CREATE OR REPLACE VIEW v AS " + "SELECT id FROM src ".repeat(200);
Map<String, Object> doc = emptyDoc();
for (int i = 1; i <= 660; i++) {
runScript(doc, edge("edge-" + i, largeSql));
}
List<Map<String, Object>> edges = (List<Map<String, Object>>) doc.get("upstreamLineage");
Map<String, String> sqlMap = (Map<String, String>) doc.get("lineageSqlQueries");
assertEquals(660, edges.size());
assertEquals(1, sqlMap.size(), "660 identical SQLs must produce exactly 1 map entry");
assertTrue(edges.stream().allMatch(e -> "1".equals(e.get("sqlQueryKey"))));
assertTrue(edges.stream().noneMatch(e -> e.get("sqlQuery") != null));
}
@Test
@SuppressWarnings("unchecked")
void mixedEdges_onlySqlEdgesDeduplicated() {
Map<String, Object> doc = emptyDoc();
runScript(doc, edge("edge-1", "SELECT 1"));
runScript(doc, edge("edge-2", null));
runScript(doc, edge("edge-3", "SELECT 1"));
List<Map<String, Object>> edges = (List<Map<String, Object>>) doc.get("upstreamLineage");
Map<String, String> sqlMap = (Map<String, String>) doc.get("lineageSqlQueries");
assertEquals(3, edges.size());
assertEquals("1", edges.get(0).get("sqlQueryKey"));
assertNull(edges.get(1).get("sqlQueryKey"));
assertEquals("1", edges.get(2).get("sqlQueryKey"));
assertEquals(1, sqlMap.size());
}
}

View file

@ -3,12 +3,15 @@ package org.openmetadata.service.search.indexes;
import static org.junit.jupiter.api.Assertions.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.junit.jupiter.api.Test;
import org.openmetadata.schema.api.lineage.EsLineageData;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.util.FullyQualifiedName;
@ -280,6 +283,154 @@ class SearchIndexTest {
}
}
// SQL deduplication tests
@Test
void testDeduplicateSql_noEdges_returnsEmptyMap() {
Map<String, String> result =
SearchIndexUtils.deduplicateSqlAcrossEdges(Collections.emptyList());
assertTrue(result.isEmpty());
}
@Test
void testDeduplicateSql_edgesWithNoSql_untouched() {
EsLineageData e1 = new EsLineageData();
EsLineageData e2 = new EsLineageData();
Map<String, String> result = SearchIndexUtils.deduplicateSqlAcrossEdges(List.of(e1, e2));
assertTrue(result.isEmpty(), "no SQL means no dedup map entries");
assertNull(e1.getSqlQueryKey());
assertNull(e2.getSqlQueryKey());
}
@Test
void testDeduplicateSql_singleEdgeWithSql_getsKeyOne() {
EsLineageData edge = new EsLineageData().withSqlQuery("SELECT 1");
Map<String, String> result = SearchIndexUtils.deduplicateSqlAcrossEdges(List.of(edge));
assertEquals(Map.of("1", "SELECT 1"), result);
assertEquals("1", edge.getSqlQueryKey());
assertNull(edge.getSqlQuery(), "sql text should be cleared after keying");
}
@Test
void testDeduplicateSql_identicalSqlAcrossEdges_sameKey() {
String sql = "CREATE OR REPLACE VIEW analytics AS SELECT * FROM source";
List<EsLineageData> edges =
List.of(
new EsLineageData().withSqlQuery(sql),
new EsLineageData().withSqlQuery(sql),
new EsLineageData().withSqlQuery(sql));
Map<String, String> result = SearchIndexUtils.deduplicateSqlAcrossEdges(edges);
assertEquals(1, result.size(), "identical SQL stored exactly once");
assertEquals("1", result.keySet().iterator().next());
for (EsLineageData edge : edges) {
assertEquals("1", edge.getSqlQueryKey(), "all edges should reference the same key");
assertNull(edge.getSqlQuery(), "sql text cleared on all edges");
}
}
@Test
void testDeduplicateSql_distinctSqls_getSequentialKeys() {
EsLineageData e1 = new EsLineageData().withSqlQuery("SELECT a FROM t1");
EsLineageData e2 = new EsLineageData().withSqlQuery("SELECT b FROM t2");
EsLineageData e3 = new EsLineageData().withSqlQuery("SELECT c FROM t3");
Map<String, String> result = SearchIndexUtils.deduplicateSqlAcrossEdges(List.of(e1, e2, e3));
assertEquals(3, result.size());
assertEquals("SELECT a FROM t1", result.get("1"));
assertEquals("SELECT b FROM t2", result.get("2"));
assertEquals("SELECT c FROM t3", result.get("3"));
assertEquals("1", e1.getSqlQueryKey());
assertEquals("2", e2.getSqlQueryKey());
assertEquals("3", e3.getSqlQueryKey());
}
@Test
void testDeduplicateSql_mixedEdgesSomeSqlSomeNot() {
String sql = "SELECT id FROM src";
EsLineageData withSql1 = new EsLineageData().withSqlQuery(sql);
EsLineageData noSql = new EsLineageData();
EsLineageData withSql2 = new EsLineageData().withSqlQuery(sql);
Map<String, String> result =
SearchIndexUtils.deduplicateSqlAcrossEdges(List.of(withSql1, noSql, withSql2));
assertEquals(Map.of("1", sql), result);
assertEquals("1", withSql1.getSqlQueryKey());
assertNull(withSql1.getSqlQuery());
assertNull(noSql.getSqlQueryKey(), "edge without SQL should not get a key");
assertEquals("1", withSql2.getSqlQueryKey());
}
@Test
void testDeduplicateSql_batchRunScenario_660EdgesSameSql() {
// Mirrors the real-world scenario: a BATCH_RUN VIEW has 660+ upstream tables,
// each edge carrying the same ~30 KB CREATE VIEW SQL.
// After dedup the map should have exactly 1 entry and all edges share key "1".
String largeSql =
"CREATE OR REPLACE VIEW batch_view AS " + "SELECT * FROM source_table ".repeat(500);
int edgeCount = 660;
List<EsLineageData> edges =
IntStream.range(0, edgeCount)
.mapToObj(i -> new EsLineageData().withSqlQuery(largeSql))
.collect(Collectors.toList());
Map<String, String> result = SearchIndexUtils.deduplicateSqlAcrossEdges(edges);
assertEquals(1, result.size(), "660 identical SQLs deduplicated to 1 entry");
assertEquals(largeSql, result.get("1"));
for (EsLineageData edge : edges) {
assertEquals("1", edge.getSqlQueryKey());
assertNull(edge.getSqlQuery());
}
}
@Test
void testDeduplicateSql_partialDuplication_correctGrouping() {
// sqlA appears on 3 edges, sqlB appears on 2 edges, sqlC appears once.
String sqlA = "SELECT a FROM tA";
String sqlB = "SELECT b FROM tB";
String sqlC = "SELECT c FROM tC";
List<EsLineageData> edges =
List.of(
new EsLineageData().withSqlQuery(sqlA),
new EsLineageData().withSqlQuery(sqlB),
new EsLineageData().withSqlQuery(sqlA),
new EsLineageData().withSqlQuery(sqlC),
new EsLineageData().withSqlQuery(sqlA),
new EsLineageData().withSqlQuery(sqlB));
Map<String, String> result = SearchIndexUtils.deduplicateSqlAcrossEdges(edges);
assertEquals(3, result.size());
// Keys assigned in first-seen order
String keyA = edges.get(0).getSqlQueryKey();
String keyB = edges.get(1).getSqlQueryKey();
String keyC = edges.get(3).getSqlQueryKey();
assertNotEquals(keyA, keyB);
assertNotEquals(keyA, keyC);
assertNotEquals(keyB, keyC);
// All edges with the same SQL share the same key
assertEquals(keyA, edges.get(2).getSqlQueryKey());
assertEquals(keyA, edges.get(4).getSqlQueryKey());
assertEquals(keyB, edges.get(5).getSqlQueryKey());
// The map stores the correct SQL for each key
assertEquals(sqlA, result.get(keyA));
assertEquals(sqlB, result.get(keyB));
assertEquals(sqlC, result.get(keyC));
}
private Map<String, Object> buildDocWithChangeDescription(Object newValue) {
Map<String, Object> fieldChange = new HashMap<>();
fieldChange.put("name", "deleted");

View file

@ -694,6 +694,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -702,6 +702,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -615,6 +615,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -731,6 +734,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -786,6 +786,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -955,6 +958,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -276,6 +276,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -682,6 +685,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -269,6 +269,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -936,6 +939,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -488,6 +488,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -790,6 +793,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -259,6 +259,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -823,6 +826,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -646,6 +646,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -772,6 +775,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -698,6 +698,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -824,6 +827,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -702,6 +702,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -592,6 +592,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -697,6 +700,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -259,6 +259,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -831,6 +834,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -304,6 +304,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -738,6 +741,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -702,6 +702,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -399,6 +399,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -793,6 +796,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -687,6 +687,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -813,6 +816,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -695,6 +695,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -811,6 +814,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -844,6 +844,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -1055,6 +1058,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -219,41 +219,7 @@
}
},
"users": {
"properties": {
"id": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 36
}
}
},
"type": {
"type": "keyword"
},
"name": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullyQualifiedName": {
"type": "text"
},
"description": {
"type": "text"
},
"deleted": {
"type": "boolean"
},
"href": {
"type": "text"
}
}
"enabled": false
},
"userCount": {
"type": "long"
@ -296,41 +262,7 @@
}
},
"defaultRoles": {
"properties": {
"id": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 36
}
}
},
"type": {
"type": "keyword"
},
"name": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullyQualifiedName": {
"type": "text"
},
"description": {
"type": "text"
},
"deleted": {
"type": "boolean"
},
"href": {
"type": "text"
}
}
"enabled": false
},
"isJoinable": {
"type": "text"

View file

@ -649,6 +649,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -818,6 +821,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -811,6 +811,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -937,6 +940,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -699,6 +699,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -707,6 +707,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -594,6 +594,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -704,6 +707,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -773,6 +773,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -918,6 +921,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -253,6 +253,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -710,6 +713,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -256,6 +256,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -858,6 +861,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -254,6 +254,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -743,6 +746,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -256,6 +256,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -805,6 +808,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -678,6 +678,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -754,6 +757,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -693,6 +693,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -769,6 +772,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -707,6 +707,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -588,6 +588,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -681,6 +684,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -255,6 +255,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -807,6 +810,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -302,6 +302,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -695,6 +698,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -707,6 +707,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -594,6 +594,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -780,6 +783,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -678,6 +678,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -754,6 +757,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -771,6 +771,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -887,6 +890,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -686,6 +686,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -1036,6 +1039,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -211,41 +211,7 @@
}
},
"users": {
"properties": {
"id": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 36
}
}
},
"type": {
"type": "keyword"
},
"name": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullyQualifiedName": {
"type": "text"
},
"description": {
"type": "text"
},
"deleted": {
"type": "boolean"
},
"href": {
"type": "text"
}
}
"enabled": false
},
"userCount": {
"type": "long"
@ -288,41 +254,7 @@
}
},
"defaultRoles": {
"properties": {
"id": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 36
}
}
},
"type": {
"type": "keyword"
},
"name": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullyQualifiedName": {
"type": "text"
},
"description": {
"type": "text"
},
"deleted": {
"type": "boolean"
},
"href": {
"type": "text"
}
}
"enabled": false
},
"isJoinable": {
"type": "text"

View file

@ -255,6 +255,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -790,6 +793,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -751,6 +751,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -827,6 +830,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -713,6 +713,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -721,6 +721,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -632,6 +632,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -748,6 +751,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -803,6 +803,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -972,6 +975,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -295,6 +295,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -699,6 +702,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -279,6 +279,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -909,6 +912,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -505,6 +505,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -762,6 +765,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -278,6 +278,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -840,6 +843,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -579,6 +579,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -655,6 +658,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -634,6 +634,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -710,6 +713,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -721,6 +721,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -562,6 +562,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -667,6 +670,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -278,6 +278,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -848,6 +851,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -322,6 +322,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -755,6 +758,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -721,6 +721,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -421,6 +421,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -810,6 +813,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -633,6 +633,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -709,6 +712,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -712,6 +712,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -828,6 +831,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -852,6 +852,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -1042,6 +1045,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -238,41 +238,7 @@
}
},
"users": {
"properties": {
"id": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 36
}
}
},
"type": {
"type": "keyword"
},
"name": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullyQualifiedName": {
"type": "text"
},
"description": {
"type": "text"
},
"deleted": {
"type": "boolean"
},
"href": {
"type": "text"
}
}
"enabled": false
},
"userCount": {
"type": "long"
@ -315,41 +281,7 @@
}
},
"defaultRoles": {
"properties": {
"id": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 36
}
}
},
"type": {
"type": "keyword"
},
"name": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullyQualifiedName": {
"type": "text"
},
"description": {
"type": "text"
},
"deleted": {
"type": "boolean"
},
"href": {
"type": "text"
}
}
"enabled": false
},
"isJoinable": {
"type": "text"

View file

@ -666,6 +666,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -835,6 +838,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -693,6 +693,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -769,6 +772,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -689,6 +689,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -697,6 +697,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -588,6 +588,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -704,6 +707,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -774,6 +774,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -919,6 +922,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -253,6 +253,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -696,6 +699,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -260,6 +260,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -860,6 +863,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -318,6 +318,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -741,6 +744,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -256,6 +256,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -762,6 +765,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -651,6 +651,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -727,6 +730,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -666,6 +666,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -742,6 +745,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -697,6 +697,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

View file

@ -592,6 +592,9 @@
"ignore_above": 512
}
}
},
"sqlQueryKey": {
"type": "keyword"
}
}
},
@ -673,6 +676,10 @@
"ownerName": {
"type": "keyword",
"normalizer": "lowercase_normalizer"
},
"lineageSqlQueries": {
"type": "object",
"enabled": false
}
}
}

Some files were not shown because too many files have changed in this diff Show more