fix(rdf): schema default + migration force entities=[all] for safe full reindex

- rdfIndexingAppConfig.json: flip recreateIndex.default from false to true so
  any UI form / config generation path that surfaces the schema default agrees
  with the install JSON files and the new full-rebuild semantics.

- 2.0.1 migration (MySQL + Postgres): in addition to flipping recreateIndex=true
  and the weekly Saturday cron, also rewrite appConfiguration.entities to
  ["all"]. Pre-upgrade an operator could have narrowed RDF indexing to a subset
  of entity types; the new recreateIndex=true semantics issues CLEAR ALL before
  indexing, which would otherwise wipe triples for excluded entity types and
  leave the graph permanently missing them. Forcing entities back to ["all"]
  ensures the post-CLEAR-ALL run repopulates the graph fully. Operators can
  re-narrow after the migration if they need partial indexing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sriharsha Chintalapani 2026-05-15 09:49:48 -07:00
parent 7a1fae7f89
commit e2575d51ab
3 changed files with 36 additions and 13 deletions

View file

@ -1,15 +1,26 @@
-- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1
-- RdfIndexApp: switch to weekly Saturday cron and recreate-on-each-run.
-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run.
-- Previous defaults (daily, incremental) were producing unbounded triple growth
-- because relationship-removal paths weren't fully reconciled. With per-run
-- CLEAR ALL the dataset always converges to the current MySQL state; weekly
-- cadence keeps the per-run cost from saturating Fuseki.
-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps
-- per-run cost from saturating Fuseki.
--
-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have
-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true
-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe
-- triples for entity types still in MySQL but missing from the subset list.
-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run
-- repopulates the graph fully; operators can re-narrow after the migration if
-- they need partial indexing.
UPDATE installed_apps
SET json = JSON_SET(
json,
'$.appConfiguration.recreateIndex', CAST('true' AS JSON),
'$.appSchedule.cronExpression', '0 0 * * 6'
JSON_SET(
json,
'$.appConfiguration.recreateIndex', CAST('true' AS JSON),
'$.appSchedule.cronExpression', '0 0 * * 6'
),
'$.appConfiguration.entities', JSON_ARRAY('all')
)
WHERE name = 'RdfIndexApp';

View file

@ -1,15 +1,27 @@
-- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1
-- RdfIndexApp: switch to weekly Saturday cron and recreate-on-each-run.
-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run.
-- Previous defaults (daily, incremental) were producing unbounded triple growth
-- because relationship-removal paths weren't fully reconciled. With per-run
-- CLEAR ALL the dataset always converges to the current MySQL state; weekly
-- cadence keeps the per-run cost from saturating Fuseki.
-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps
-- per-run cost from saturating Fuseki.
--
-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have
-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true
-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe
-- triples for entity types still in MySQL but missing from the subset list.
-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run
-- repopulates the graph fully; operators can re-narrow after the migration if
-- they need partial indexing.
UPDATE installed_apps
SET json = jsonb_set(
jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true'),
'{appSchedule,cronExpression}',
'"0 0 * * 6"'
jsonb_set(
jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true'),
'{appSchedule,cronExpression}',
'"0 0 * * 6"'
),
'{appConfiguration,entities}',
'["all"]'::jsonb
)
WHERE name = 'RdfIndexApp';

View file

@ -104,7 +104,7 @@
"title": "Recreate RDF Store",
"description": "Recreate the RDF store before indexing.",
"type": "boolean",
"default": false
"default": true
},
"batchSize": {
"title": "Batch Size",