mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
Address PR #27558 copilot review round 3
- bootstrap/sql/migrations/native/2.0.0/mysql/schemaChanges.sql:
- asset_entity: add PRIMARY KEY (id); mark all generated columns STORED
for consistency with the other drive/knowledge tables in the same
migration; compute deleted as a real boolean via
IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE) so the boolean index
behaves correctly.
- knowledge_center: mark name, updatedAt, updatedBy, pageType as STORED
and apply the same deleted expression so the existing indexes on
name and (fqnHash, deleted) are reliable on fresh installs.
- drive_folder / context_file / context_file_content: update the
deleted generated column to use the same boolean-safe expression.
- ElasticSearch/OpenSearch hierarchy search: add an explicit sort on
fullyQualifiedName ASC with _id ASC as tiebreaker so from/size
pagination is deterministic and cannot skip/duplicate pages between
requests.
This commit is contained in:
parent
80dfb3581c
commit
4a75852a7e
3 changed files with 83 additions and 14 deletions
|
|
@ -8,12 +8,12 @@
|
|||
CREATE TABLE IF NOT EXISTS knowledge_center (
|
||||
id VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.id') STORED NOT NULL,
|
||||
fqnHash VARCHAR(756) NOT NULL COLLATE ascii_bin,
|
||||
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.name') NOT NULL,
|
||||
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.name') STORED NOT NULL,
|
||||
json JSON NOT NULL,
|
||||
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') NOT NULL,
|
||||
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') NOT NULL,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted'),
|
||||
pageType VARCHAR(16) GENERATED ALWAYS AS (json ->> '$.pageType') NOT NULL,
|
||||
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
|
||||
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
|
||||
pageType VARCHAR(16) GENERATED ALWAYS AS (json ->> '$.pageType') STORED NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (fqnHash),
|
||||
INDEX knowledge_center_name_index (name),
|
||||
|
|
@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS drive_folder (
|
|||
json JSON NOT NULL,
|
||||
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
|
||||
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted') STORED,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE KEY unique_drive_folder_name (nameHash),
|
||||
INDEX idx_drive_folder_updated_at (updatedAt)
|
||||
|
|
@ -42,7 +42,7 @@ CREATE TABLE IF NOT EXISTS context_file (
|
|||
json JSON NOT NULL,
|
||||
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
|
||||
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted') STORED,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE KEY unique_context_file_name (nameHash),
|
||||
INDEX idx_context_file_updated_at (updatedAt)
|
||||
|
|
@ -52,15 +52,16 @@ CREATE TABLE IF NOT EXISTS context_file (
|
|||
-- Existing Collate customers have this from 1.7.0-collate. CREATE TABLE IF NOT EXISTS is a no-op for them.
|
||||
CREATE TABLE IF NOT EXISTS asset_entity (
|
||||
id VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.id') STORED NOT NULL,
|
||||
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fileName') NOT NULL,
|
||||
name VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fileName') STORED NOT NULL,
|
||||
url VARCHAR(1024) GENERATED ALWAYS AS (json ->> '$.url') STORED NOT NULL,
|
||||
fullyQualifiedName VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fullyQualifiedName') NOT NULL,
|
||||
assetType VARCHAR(100) GENERATED ALWAYS AS (json ->> '$.assetType') NOT NULL,
|
||||
fullyQualifiedName VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.fullyQualifiedName') STORED NOT NULL,
|
||||
assetType VARCHAR(100) GENERATED ALWAYS AS (json ->> '$.assetType') STORED NOT NULL,
|
||||
json JSON NOT NULL,
|
||||
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') NOT NULL,
|
||||
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') NOT NULL,
|
||||
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
|
||||
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
|
||||
fqnHash VARCHAR(768) CHARACTER SET ascii COLLATE ascii_bin DEFAULT NULL,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted'),
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
|
||||
PRIMARY KEY (id),
|
||||
INDEX fqnhash_index (fqnHash),
|
||||
INDEX asset_type_index (assetType),
|
||||
INDEX idx_asset_deleted (deleted)
|
||||
|
|
@ -74,7 +75,7 @@ CREATE TABLE IF NOT EXISTS context_file_content (
|
|||
json JSON NOT NULL,
|
||||
updatedAt BIGINT UNSIGNED GENERATED ALWAYS AS (json ->> '$.updatedAt') STORED NOT NULL,
|
||||
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> '$.updatedBy') STORED NOT NULL,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (json -> '$.deleted') STORED,
|
||||
deleted BOOLEAN GENERATED ALWAYS AS (IFNULL(JSON_EXTRACT(json, '$.deleted'), FALSE)) STORED,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE KEY unique_context_file_content_name (nameHash),
|
||||
INDEX idx_context_file_content_updated_at (updatedAt)
|
||||
|
|
|
|||
|
|
@ -1081,6 +1081,23 @@ public class ElasticSearchClient implements SearchClient {
|
|||
org.openmetadata.service.jdbi3.KnowledgePageRepository
|
||||
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
|
||||
.query(boolQuery)
|
||||
// Stable sort so from/size pagination cannot miss/duplicate hits.
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("fullyQualifiedName")
|
||||
.order(
|
||||
es.co.elastic.clients.elasticsearch._types.SortOrder
|
||||
.Asc)))
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("_id")
|
||||
.order(
|
||||
es.co.elastic.clients.elasticsearch._types.SortOrder
|
||||
.Asc)))
|
||||
.from(offset)
|
||||
.size(limit));
|
||||
|
||||
|
|
@ -1114,6 +1131,23 @@ public class ElasticSearchClient implements SearchClient {
|
|||
org.openmetadata.service.jdbi3.KnowledgePageRepository
|
||||
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
|
||||
.query(boolQuery)
|
||||
// Stable sort so from/size pagination cannot miss/duplicate hits.
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("fullyQualifiedName")
|
||||
.order(
|
||||
es.co.elastic.clients.elasticsearch._types.SortOrder
|
||||
.Asc)))
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("_id")
|
||||
.order(
|
||||
es.co.elastic.clients.elasticsearch._types.SortOrder
|
||||
.Asc)))
|
||||
.from(offset)
|
||||
.size(limit));
|
||||
|
||||
|
|
|
|||
|
|
@ -1114,6 +1114,23 @@ public class OpenSearchClient implements SearchClient {
|
|||
org.openmetadata.service.jdbi3.KnowledgePageRepository
|
||||
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
|
||||
.query(boolQuery)
|
||||
// Stable sort so from/size pagination cannot miss/duplicate hits.
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("fullyQualifiedName")
|
||||
.order(
|
||||
os.org.opensearch.client.opensearch._types.SortOrder
|
||||
.Asc)))
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("_id")
|
||||
.order(
|
||||
os.org.opensearch.client.opensearch._types.SortOrder
|
||||
.Asc)))
|
||||
.from(offset)
|
||||
.size(limit));
|
||||
|
||||
|
|
@ -1148,6 +1165,23 @@ public class OpenSearchClient implements SearchClient {
|
|||
org.openmetadata.service.jdbi3.KnowledgePageRepository
|
||||
.KNOWLEDGE_PAGE_TERM_SEARCH_INDEX))
|
||||
.query(boolQuery)
|
||||
// Stable sort so from/size pagination cannot miss/duplicate hits.
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("fullyQualifiedName")
|
||||
.order(
|
||||
os.org.opensearch.client.opensearch._types.SortOrder
|
||||
.Asc)))
|
||||
.sort(
|
||||
sort ->
|
||||
sort.field(
|
||||
f ->
|
||||
f.field("_id")
|
||||
.order(
|
||||
os.org.opensearch.client.opensearch._types.SortOrder
|
||||
.Asc)))
|
||||
.from(offset)
|
||||
.size(limit));
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue