mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
fix(rdf): schema default + migration force entities=[all] for safe full reindex
- rdfIndexingAppConfig.json: flip recreateIndex.default from false to true so any UI form / config generation path that surfaces the schema default agrees with the install JSON files and the new full-rebuild semantics. - 2.0.1 migration (MySQL + Postgres): in addition to flipping recreateIndex=true and the weekly Saturday cron, also rewrite appConfiguration.entities to ["all"]. Pre-upgrade an operator could have narrowed RDF indexing to a subset of entity types; the new recreateIndex=true semantics issues CLEAR ALL before indexing, which would otherwise wipe triples for excluded entity types and leave the graph permanently missing them. Forcing entities back to ["all"] ensures the post-CLEAR-ALL run repopulates the graph fully. Operators can re-narrow after the migration if they need partial indexing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7a1fae7f89
commit
e2575d51ab
3 changed files with 36 additions and 13 deletions
|
|
@ -1,15 +1,26 @@
|
|||
-- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1
|
||||
|
||||
-- RdfIndexApp: switch to weekly Saturday cron and recreate-on-each-run.
|
||||
-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run.
|
||||
-- Previous defaults (daily, incremental) were producing unbounded triple growth
|
||||
-- because relationship-removal paths weren't fully reconciled. With per-run
|
||||
-- CLEAR ALL the dataset always converges to the current MySQL state; weekly
|
||||
-- cadence keeps the per-run cost from saturating Fuseki.
|
||||
-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps
|
||||
-- per-run cost from saturating Fuseki.
|
||||
--
|
||||
-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have
|
||||
-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true
|
||||
-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe
|
||||
-- triples for entity types still in MySQL but missing from the subset list.
|
||||
-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run
|
||||
-- repopulates the graph fully; operators can re-narrow after the migration if
|
||||
-- they need partial indexing.
|
||||
UPDATE installed_apps
|
||||
SET json = JSON_SET(
|
||||
json,
|
||||
'$.appConfiguration.recreateIndex', CAST('true' AS JSON),
|
||||
'$.appSchedule.cronExpression', '0 0 * * 6'
|
||||
JSON_SET(
|
||||
json,
|
||||
'$.appConfiguration.recreateIndex', CAST('true' AS JSON),
|
||||
'$.appSchedule.cronExpression', '0 0 * * 6'
|
||||
),
|
||||
'$.appConfiguration.entities', JSON_ARRAY('all')
|
||||
)
|
||||
WHERE name = 'RdfIndexApp';
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,27 @@
|
|||
-- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1
|
||||
|
||||
-- RdfIndexApp: switch to weekly Saturday cron and recreate-on-each-run.
|
||||
-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run.
|
||||
-- Previous defaults (daily, incremental) were producing unbounded triple growth
|
||||
-- because relationship-removal paths weren't fully reconciled. With per-run
|
||||
-- CLEAR ALL the dataset always converges to the current MySQL state; weekly
|
||||
-- cadence keeps the per-run cost from saturating Fuseki.
|
||||
-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps
|
||||
-- per-run cost from saturating Fuseki.
|
||||
--
|
||||
-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have
|
||||
-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true
|
||||
-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe
|
||||
-- triples for entity types still in MySQL but missing from the subset list.
|
||||
-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run
|
||||
-- repopulates the graph fully; operators can re-narrow after the migration if
|
||||
-- they need partial indexing.
|
||||
UPDATE installed_apps
|
||||
SET json = jsonb_set(
|
||||
jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true'),
|
||||
'{appSchedule,cronExpression}',
|
||||
'"0 0 * * 6"'
|
||||
jsonb_set(
|
||||
jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true'),
|
||||
'{appSchedule,cronExpression}',
|
||||
'"0 0 * * 6"'
|
||||
),
|
||||
'{appConfiguration,entities}',
|
||||
'["all"]'::jsonb
|
||||
)
|
||||
WHERE name = 'RdfIndexApp';
|
||||
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@
|
|||
"title": "Recreate RDF Store",
|
||||
"description": "Recreate the RDF store before indexing.",
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
"default": true
|
||||
},
|
||||
"batchSize": {
|
||||
"title": "Batch Size",
|
||||
|
|
|
|||
Loading…
Reference in a new issue