diff --git a/bootstrap/sql/migrations/native/2.0.1/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/2.0.1/mysql/postDataMigrationSQLScript.sql index acda6e87865..c5d42fe2682 100644 --- a/bootstrap/sql/migrations/native/2.0.1/mysql/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/2.0.1/mysql/postDataMigrationSQLScript.sql @@ -1,15 +1,26 @@ -- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1 --- RdfIndexApp: switch to weekly Saturday cron and recreate-on-each-run. +-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run. -- Previous defaults (daily, incremental) were producing unbounded triple growth -- because relationship-removal paths weren't fully reconciled. With per-run --- CLEAR ALL the dataset always converges to the current MySQL state; weekly --- cadence keeps the per-run cost from saturating Fuseki. +-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps +-- per-run cost from saturating Fuseki. +-- +-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have +-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true +-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe +-- triples for entity types still in MySQL but missing from the subset list. +-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run +-- repopulates the graph fully; operators can re-narrow after the migration if +-- they need partial indexing. UPDATE installed_apps SET json = JSON_SET( - json, - '$.appConfiguration.recreateIndex', CAST('true' AS JSON), - '$.appSchedule.cronExpression', '0 0 * * 6' + JSON_SET( + json, + '$.appConfiguration.recreateIndex', CAST('true' AS JSON), + '$.appSchedule.cronExpression', '0 0 * * 6' + ), + '$.appConfiguration.entities', JSON_ARRAY('all') ) WHERE name = 'RdfIndexApp'; diff --git a/bootstrap/sql/migrations/native/2.0.1/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/2.0.1/postgres/postDataMigrationSQLScript.sql index 1b1f278a4fc..9501907d1ac 100644 --- a/bootstrap/sql/migrations/native/2.0.1/postgres/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/2.0.1/postgres/postDataMigrationSQLScript.sql @@ -1,15 +1,27 @@ -- Post data migration script for Task workflow cutover - OpenMetadata 2.0.1 --- RdfIndexApp: switch to weekly Saturday cron and recreate-on-each-run. +-- RdfIndexApp: switch to weekly Saturday cron and full-rebuild every run. -- Previous defaults (daily, incremental) were producing unbounded triple growth -- because relationship-removal paths weren't fully reconciled. With per-run --- CLEAR ALL the dataset always converges to the current MySQL state; weekly --- cadence keeps the per-run cost from saturating Fuseki. +-- CLEAR ALL the dataset always converges to MySQL state; weekly cadence keeps +-- per-run cost from saturating Fuseki. +-- +-- Also rewrite `entities` to `["all"]`. Pre-upgrade, an operator could have +-- narrowed RDF indexing to a subset of entity types; the new recreateIndex=true +-- semantics issues a CLEAR ALL before indexing, which would otherwise wipe +-- triples for entity types still in MySQL but missing from the subset list. +-- Forcing the subset list back to `["all"]` ensures the post-CLEAR-ALL run +-- repopulates the graph fully; operators can re-narrow after the migration if +-- they need partial indexing. UPDATE installed_apps SET json = jsonb_set( - jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true'), - '{appSchedule,cronExpression}', - '"0 0 * * 6"' + jsonb_set( + jsonb_set(json::jsonb, '{appConfiguration,recreateIndex}', 'true'), + '{appSchedule,cronExpression}', + '"0 0 * * 6"' + ), + '{appConfiguration,entities}', + '["all"]'::jsonb ) WHERE name = 'RdfIndexApp'; diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/rdfIndexingAppConfig.json b/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/rdfIndexingAppConfig.json index e5f2f4d2801..285d10c1559 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/rdfIndexingAppConfig.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/rdfIndexingAppConfig.json @@ -104,7 +104,7 @@ "title": "Recreate RDF Store", "description": "Recreate the RDF store before indexing.", "type": "boolean", - "default": false + "default": true }, "batchSize": { "title": "Batch Size",