Address PR #27558 copilot review round 3

- bootstrap/sql/migrations/native/2.0.0/mysql/schemaChanges.sql:
  - asset_entity: add PRIMARY KEY (id); mark all generated columns STORED
    for consistency with the other drive/knowledge tables in the same
    migration; compute deleted as a real boolean via
    IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE) so the boolean index
    behaves correctly.
  - knowledge_center: mark name, updatedAt, updatedBy, pageType as STORED
    and apply the same deleted expression so the existing indexes on
    name and (fqnHash, deleted) are reliable on fresh installs.
  - drive_folder / context_file / context_file_content: update the
    deleted generated column to use the same boolean-safe expression.
- ElasticSearch/OpenSearch hierarchy search: add an explicit sort on
  fullyQualifiedName ASC with _id ASC as tiebreaker so from/size
  pagination is deterministic and cannot skip/duplicate pages between
  requests.
This commit is contained in:
Sriharsha Chintalapani 2026-04-20 20:13:30 -07:00
parent 80dfb3581c
commit 4a75852a7e
3 changed files with 83 additions and 14 deletions

View file

@ -8,12 +8,12 @@
CREATE TABLE IF NOT EXISTS knowledge_center (
id VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.id') STORED NOT NULL,
fqnHash VARCHAR(756) NOT NULL COLLATE ascii_bin,
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.name') NOT NULL,
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.name') STORED NOT NULL,
json JSON NOT NULL,
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') NOT NULL,
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted'),
pageType VARCHAR(16) GENERATED ALWAYS AS (json ->> '$.pageType') NOT NULL,
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
pageType VARCHAR(16) GENERATED ALWAYS AS (json ->> '$.pageType') STORED NOT NULL,
PRIMARY KEY (id),
UNIQUE (fqnHash),
INDEX knowledge_center_name_index (name),
@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS drive_folder (
json JSON NOT NULL,
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted') STORED,
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
PRIMARY KEY (id),
UNIQUE KEY unique_drive_folder_name (nameHash),
INDEX idx_drive_folder_updated_at (updatedAt)
@ -42,7 +42,7 @@ CREATE TABLE IF NOT EXISTS context_file (
json JSON NOT NULL,
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted') STORED,
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
PRIMARY KEY (id),
UNIQUE KEY unique_context_file_name (nameHash),
INDEX idx_context_file_updated_at (updatedAt)
@ -52,15 +52,16 @@ CREATE TABLE IF NOT EXISTS context_file (
-- Existing Collate customers have this from 1.7.0-collate. CREATE TABLE IF NOT EXISTS is a no-op for them.
CREATE TABLE IF NOT EXISTS asset_entity (
id VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.id') STORED NOT NULL,
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fileName') NOT NULL,
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fileName') STORED NOT NULL,
url VARCHAR(1024) GENERATED ALWAYS AS (json ->> '$.url') STORED NOT NULL,
fullyQualifiedName VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fullyQualifiedName') NOT NULL,
assetType VARCHAR(100) GENERATED ALWAYS AS (json ->> '$.assetType') NOT NULL,
fullyQualifiedName VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fullyQualifiedName') STORED NOT NULL,
assetType VARCHAR(100) GENERATED ALWAYS AS (json ->> '$.assetType') STORED NOT NULL,
json JSON NOT NULL,
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') NOT NULL,
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
fqnHash VARCHAR(768) CHARACTER SET ascii COLLATE ascii_bin DEFAULT NULL,
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted'),
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
PRIMARY KEY (id),
INDEX fqnhash_index (fqnHash),
INDEX asset_type_index (assetType),
INDEX idx_asset_deleted (deleted)
@ -74,7 +75,7 @@ CREATE TABLE IF NOT EXISTS context_file_content (
json JSON NOT NULL,
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted') STORED,
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
PRIMARY KEY (id),
UNIQUE KEY unique_context_file_content_name (nameHash),
INDEX idx_context_file_content_updated_at (updatedAt)

View file

@ -1081,6 +1081,23 @@ public class ElasticSearchClient implements SearchClient {
org.openmetadata.service.jdbi3.KnowledgePageRepository
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
.query(boolQuery)
// Stable sort so from/size pagination cannot miss/duplicate hits.
.sort(
sort ->
sort.field(
f ->
f.field("fullyQualifiedName")
.order(
es.co.elastic.clients.elasticsearch._types.SortOrder
.Asc)))
.sort(
sort ->
sort.field(
f ->
f.field("_id")
.order(
es.co.elastic.clients.elasticsearch._types.SortOrder
.Asc)))
.from(offset)
.size(limit));
@ -1114,6 +1131,23 @@ public class ElasticSearchClient implements SearchClient {
org.openmetadata.service.jdbi3.KnowledgePageRepository
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
.query(boolQuery)
// Stable sort so from/size pagination cannot miss/duplicate hits.
.sort(
sort ->
sort.field(
f ->
f.field("fullyQualifiedName")
.order(
es.co.elastic.clients.elasticsearch._types.SortOrder
.Asc)))
.sort(
sort ->
sort.field(
f ->
f.field("_id")
.order(
es.co.elastic.clients.elasticsearch._types.SortOrder
.Asc)))
.from(offset)
.size(limit));

View file

@ -1114,6 +1114,23 @@ public class OpenSearchClient implements SearchClient {
org.openmetadata.service.jdbi3.KnowledgePageRepository
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
.query(boolQuery)
// Stable sort so from/size pagination cannot miss/duplicate hits.
.sort(
sort ->
sort.field(
f ->
f.field("fullyQualifiedName")
.order(
os.org.opensearch.client.opensearch._types.SortOrder
.Asc)))
.sort(
sort ->
sort.field(
f ->
f.field("_id")
.order(
os.org.opensearch.client.opensearch._types.SortOrder
.Asc)))
.from(offset)
.size(limit));
@ -1148,6 +1165,23 @@ public class OpenSearchClient implements SearchClient {
org.openmetadata.service.jdbi3.KnowledgePageRepository
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
.query(boolQuery)
// Stable sort so from/size pagination cannot miss/duplicate hits.
.sort(
sort ->
sort.field(
f ->
f.field("fullyQualifiedName")
.order(
os.org.opensearch.client.opensearch._types.SortOrder
.Asc)))
.sort(
sort ->
sort.field(
f ->
f.field("_id")
.order(
os.org.opensearch.client.opensearch._types.SortOrder
.Asc)))
.from(offset)
.size(limit));