OpenMetadata/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql
Eugenio 483461a003
Add migrations to ensure PII are really enabled (#27921)
This is especially needed for instances that had already upgraded to 1.12.0 onwards, those instaces skipped the migration cherry-picked in 1.12.6
2026-05-08 15:39:29 +00:00

125 lines
8.6 KiB
SQL

UPDATE ingestion_pipeline_entity
SET json = JSON_REMOVE(json, '$.sourceConfig.config.computeMetrics')
WHERE JSON_EXTRACT(json, '$.sourceConfig.config.computeMetrics') IS NOT NULL
AND pipelineType = 'profiler';
-- Set randomizedSample to false where it was true (old default behavior)
UPDATE ingestion_pipeline_entity
SET json = JSON_SET(json, '$.sourceConfig.config.randomizedSample', false)
WHERE JSON_EXTRACT(json, '$.sourceConfig.config.randomizedSample') = true
AND pipelineType = 'profiler';
UPDATE table_entity
SET json = JSON_SET(json, '$.tableProfilerConfig.randomizedSample', false)
WHERE JSON_EXTRACT(json, '$.tableProfilerConfig.randomizedSample') = true;
UPDATE database_entity
SET json = JSON_SET(json, '$.databaseProfilerConfig.randomizedSample', false)
WHERE JSON_EXTRACT(json, '$.databaseProfilerConfig.randomizedSample') = true;
UPDATE database_schema_entity
SET json = JSON_SET(json, '$.databaseSchemaProfilerConfig.randomizedSample', false)
WHERE JSON_EXTRACT(json, '$.databaseSchemaProfilerConfig.randomizedSample') = true;
-- Hard-delete ingestion pipelines for Iceberg services (must run before service migration)
DELETE ipe FROM ingestion_pipeline_entity ipe
JOIN dbservice_entity dse
ON JSON_UNQUOTE(JSON_EXTRACT(ipe.json, '$.service.id')) = dse.id
WHERE dse.serviceType = 'Iceberg'
AND JSON_UNQUOTE(JSON_EXTRACT(ipe.json, '$.service.type')) = 'databaseService';
-- Migrate Iceberg database services to CustomDatabase (connector removed)
-- serviceType is a GENERATED column derived from json, so only update json
UPDATE dbservice_entity
SET json = JSON_SET(
json,
'$.serviceType', 'CustomDatabase',
'$.connection.config.type', 'CustomDatabase'
)
WHERE serviceType = 'Iceberg';
-- Migrate serviceType in child entities (serviceType is in JSON blob only, no generated column)
UPDATE database_entity
SET json = JSON_SET(json, '$.serviceType', 'CustomDatabase')
WHERE JSON_UNQUOTE(JSON_EXTRACT(json, '$.serviceType')) = 'Iceberg';
UPDATE database_schema_entity
SET json = JSON_SET(json, '$.serviceType', 'CustomDatabase')
WHERE JSON_UNQUOTE(JSON_EXTRACT(json, '$.serviceType')) = 'Iceberg';
UPDATE table_entity
SET json = JSON_SET(json, '$.serviceType', 'CustomDatabase')
WHERE JSON_UNQUOTE(JSON_EXTRACT(json, '$.serviceType')) = 'Iceberg';
UPDATE stored_procedure_entity
SET json = JSON_SET(json, '$.serviceType', 'CustomDatabase')
WHERE JSON_UNQUOTE(JSON_EXTRACT(json, '$.serviceType')) = 'Iceberg';
-- Migrate existing glossary term RELATED_TO relationships to include relationType
-- For backward compatibility, existing relations without a relationType are set to "relatedTo"
UPDATE entity_relationship
SET json = JSON_SET(COALESCE(json, '{}'), '$.relationType', 'relatedTo')
WHERE fromEntity = 'glossaryTerm'
AND toEntity = 'glossaryTerm'
AND relation = 15
AND (json IS NULL OR JSON_EXTRACT(json, '$.relationType') IS NULL);
-- Insert default glossary term relation settings if they don't exist
-- This preserves any existing user customizations
INSERT INTO openmetadata_settings (configType, json)
SELECT 'glossaryTermRelationSettings', '{"relationTypes":[{"name":"relatedTo","displayName":"Related To","description":"General association between terms that are conceptually connected.","rdfPredicate":"https://open-metadata.org/ontology/relatedTo","isSymmetric":true,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"associative","isSystemDefined":true,"color":"#1570ef"},{"name":"synonym","displayName":"Synonym","description":"Terms that have the same meaning and can be used interchangeably.","rdfPredicate":"http://www.w3.org/2004/02/skos/core#exactMatch","isSymmetric":true,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"equivalence","isSystemDefined":true,"color":"#b42318"},{"name":"antonym","displayName":"Antonym","description":"Terms that have opposite meanings.","rdfPredicate":"https://open-metadata.org/ontology/antonym","isSymmetric":true,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"associative","isSystemDefined":true,"color":"#b54708"},{"name":"broader","displayName":"Broader","description":"A more general term (hypernym).","inverseRelation":"narrower","rdfPredicate":"http://www.w3.org/2004/02/skos/core#broader","isSymmetric":false,"isTransitive":true,"isCrossGlossaryAllowed":true,"category":"hierarchical","isSystemDefined":true,"color":"#067647"},{"name":"narrower","displayName":"Narrower","description":"A more specific term (hyponym).","inverseRelation":"broader","rdfPredicate":"http://www.w3.org/2004/02/skos/core#narrower","isSymmetric":false,"isTransitive":true,"isCrossGlossaryAllowed":true,"category":"hierarchical","isSystemDefined":true,"color":"#4e5ba6"},{"name":"partOf","displayName":"Part Of","description":"This term is a part or component of another term.","inverseRelation":"hasPart","rdfPredicate":"https://open-metadata.org/ontology/partOf","isSymmetric":false,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"hierarchical","isSystemDefined":true,"color":"#026aa2"},{"name":"hasPart","displayName":"Has Part","description":"This term has the other term as a part or component.","inverseRelation":"partOf","rdfPredicate":"https://open-metadata.org/ontology/hasPart","isSymmetric":false,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"hierarchical","isSystemDefined":true,"color":"#155eef"},{"name":"calculatedFrom","displayName":"Calculated From","description":"This term/metric is calculated or derived from another term.","inverseRelation":"usedToCalculate","rdfPredicate":"https://open-metadata.org/ontology/calculatedFrom","isSymmetric":false,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"associative","isSystemDefined":true,"color":"#6938ef"},{"name":"usedToCalculate","displayName":"Used To Calculate","description":"This term is used in the calculation of another term.","inverseRelation":"calculatedFrom","rdfPredicate":"https://open-metadata.org/ontology/usedToCalculate","isSymmetric":false,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"associative","isSystemDefined":true,"color":"#ba24d5"},{"name":"seeAlso","displayName":"See Also","description":"Related term that may provide additional context.","rdfPredicate":"http://www.w3.org/2000/01/rdf-schema#seeAlso","isSymmetric":true,"isTransitive":false,"isCrossGlossaryAllowed":true,"category":"associative","isSystemDefined":true,"color":"#c11574"}]}'
WHERE NOT EXISTS (
SELECT 1 FROM openmetadata_settings WHERE configType = 'glossaryTermRelationSettings'
);
-- Strip stale relatedTerms from glossary term entity JSON.
-- relatedTerms is now loaded from entity_relationship table, not from entity JSON.
-- Old data stored relatedTerms as EntityReference objects which fail to deserialize as TermRelation.
UPDATE glossary_term_entity
SET json = JSON_REMOVE(json, '$.relatedTerms')
WHERE JSON_EXTRACT(json, '$.relatedTerms') IS NOT NULL;
-- entity_extension version snapshots: handled by Java migration
-- migrateGlossaryTermVersionRelatedTermsToTermRelation (transforms in place to preserve history).
-- Backfill conceptMappings for existing glossary terms
UPDATE glossary_term_entity
SET json = JSON_SET(COALESCE(json, '{}'), '$.conceptMappings', JSON_ARRAY())
WHERE JSON_EXTRACT(json, '$.conceptMappings') IS NULL;
-- Add Container permissions to AutoClassificationBotPolicy for storage auto-classification support
UPDATE policy_entity
SET json = JSON_ARRAY_INSERT(
json,
'$.rules[1]',
JSON_OBJECT(
'name', 'AutoClassificationBotRule-Allow-Container',
'description', 'Allow adding tags and sample data to the containers',
'resources', JSON_ARRAY('Container'),
'operations', JSON_ARRAY('EditAll', 'ViewAll'),
'effect', 'allow'
)
)
WHERE JSON_UNQUOTE(JSON_EXTRACT(json, '$.name')) = 'AutoClassificationBotPolicy'
AND JSON_EXTRACT(json, '$.rules[1].name') != 'AutoClassificationBotRule-Allow-Container';
-- Fix PII classification autoClassificationConfig (issue #27910)
UPDATE classification
SET json = JSON_SET(
json,
'$.autoClassificationConfig',
CAST('{"enabled": true, "conflictResolution": "highest_priority", "minimumConfidence": 0.6, "requireExplicitMatch": true}' AS JSON)
)
WHERE JSON_VALUE(json, '$.name' RETURNING CHAR) = 'PII'
AND JSON_EXTRACT(json, '$.autoClassificationConfig.enabled') IS NULL;
-- Fix PII tags autoClassificationEnabled (issue #27910)
UPDATE tag
SET json = JSON_SET(json, '$.autoClassificationEnabled', CAST('true' AS JSON))
WHERE JSON_VALUE(json, '$.classification.name' RETURNING CHAR) = 'PII'
AND JSON_VALUE(json, '$.name' RETURNING CHAR) IN ('NonSensitive', 'Sensitive')
AND (
JSON_EXTRACT(json, '$.autoClassificationEnabled') IS NULL
OR JSON_EXTRACT(json, '$.autoClassificationEnabled') = false
);