mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
Address PR review: post-state checks, FAILED listener, hermetic IT, InOrder
Five additional behavioral fixes from Copilot review on PR #27920. 1. delete-canonical and addAliases: detect failure via post-state, not via try/catch. ElasticSearchIndexManager#deleteIndexWithBackoff and #addAliasesInternal both swallow transport exceptions and return void (same shape on the OS side), so the existing try/catch could never observe a failed delete or alias-add. After deleteIndexWithBackoff, verify the canonical no longer exists; after addAliases, verify the canonical-name alias is actually attached. If either post-condition fails, log [ALIAS_PROMOTE_FAILED reason=delete-not-acknowledged] / [reason=alias-not-attached] and mark the promotion failed (data loss for the alias-not-attached path). 2. SearchIndexExecutor.executeSingleServer: wire onJobFailed for the new FAILED status. Previously the listener chain only had callbacks for COMPLETED / COMPLETED_WITH_ERRORS / STOPPED, so promotion-driven FAILED ended without populating jobData.failure or notifying observers. Pass in an IllegalStateException naming the data-loss entities so the app run record carries the right failure context. 3. SearchIndexAliasPromotionIT trigger payload: explicitly set liveIndexSettings (1s/1/request), liveIndexSettingsByEntity (empty), and useDistributedIndexing=true. /v1/apps/trigger/X merges the payload into the persisted config rather than replacing it, so without these the test could be affected by previous local config or silently exercise the single-server path. The hard-coded post-promotion assertions are now anchored to values the test itself supplies. 4. testPromoteEntityIndexForceMergesWhenConfigured: replace standalone verify() with InOrder(forceMerge → swapAliases) so a refactor that swaps aliases first and merges afterward fails the test instead of passing. All 148 unit tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4654158706
commit
9a7fa49494
4 changed files with 48 additions and 3 deletions
|
|
@ -206,11 +206,19 @@ public class SearchIndexAliasPromotionIT {
|
|||
bulk.put("translogDurability", "async");
|
||||
bulk.put("translogSyncInterval", "30s");
|
||||
|
||||
Map<String, Object> live = new HashMap<>();
|
||||
live.put("numberOfReplicas", 1);
|
||||
live.put("refreshInterval", "1s");
|
||||
live.put("translogDurability", "request");
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("entities", List.of("table"));
|
||||
config.put("recreateIndex", true);
|
||||
config.put("batchSize", 100);
|
||||
config.put("bulkIndexSettings", bulk);
|
||||
config.put("liveIndexSettings", live);
|
||||
config.put("liveIndexSettingsByEntity", new HashMap<String, Object>());
|
||||
config.put("useDistributedIndexing", true);
|
||||
|
||||
Awaitility.await("Trigger " + APP_NAME)
|
||||
.atMost(Duration.ofMinutes(2))
|
||||
|
|
|
|||
|
|
@ -1762,6 +1762,15 @@ public class SearchIndexExecutor implements AutoCloseable {
|
|||
listeners.onJobCompletedWithErrors(stats.get(), endTime - startTime);
|
||||
} else if (status == ExecutionResult.Status.STOPPED) {
|
||||
listeners.onJobStopped(stats.get());
|
||||
} else if (status == ExecutionResult.Status.FAILED) {
|
||||
Set<String> dataLoss =
|
||||
recreateIndexHandler != null ? recreateIndexHandler.getDataLossPromotions() : Set.of();
|
||||
listeners.onJobFailed(
|
||||
stats.get(),
|
||||
new IllegalStateException(
|
||||
"Promotion data loss for entities: "
|
||||
+ dataLoss
|
||||
+ ". Canonical index was deleted but alias not re-attached."));
|
||||
}
|
||||
|
||||
return ExecutionResult.fromStats(stats.get(), status, startTime);
|
||||
|
|
|
|||
|
|
@ -322,12 +322,17 @@ public class DefaultRecreateHandler implements RecreateIndexHandler {
|
|||
}
|
||||
}
|
||||
|
||||
// deleteIndexWithBackoff and addAliases swallow transport exceptions and return void in
|
||||
// both ES and OS clients (ElasticSearchIndexManager#deleteIndexWithBackoff,
|
||||
// ElasticSearchIndexManager#addAliasesInternal — same shape on the OS side). A try/catch
|
||||
// alone cannot detect failure. Verify outcome via post-state checks: the index is gone,
|
||||
// the alias is attached.
|
||||
if (needsCanonicalAlias && searchClient.indexExists(canonicalIndex)) {
|
||||
try {
|
||||
searchClient.deleteIndexWithBackoff(canonicalIndex);
|
||||
} catch (Exception ex) {
|
||||
LOG.error(
|
||||
"[ALIAS_PROMOTE_FAILED phase=delete-canonical entity={} stagedIndex={} canonicalIndex={}] "
|
||||
"[ALIAS_PROMOTE_FAILED phase=delete-canonical entity={} stagedIndex={} canonicalIndex={} reason=exception] "
|
||||
+ "Parent aliases on staged; canonical-name lookups still hit old index until retry.",
|
||||
entityType,
|
||||
stagedIndex,
|
||||
|
|
@ -336,6 +341,16 @@ public class DefaultRecreateHandler implements RecreateIndexHandler {
|
|||
markPromotionFailed(entityType, false);
|
||||
return false;
|
||||
}
|
||||
if (searchClient.indexExists(canonicalIndex)) {
|
||||
LOG.error(
|
||||
"[ALIAS_PROMOTE_FAILED phase=delete-canonical entity={} stagedIndex={} canonicalIndex={} reason=delete-not-acknowledged] "
|
||||
+ "Client did not throw, but canonical index still exists. Old index keeps serving canonical-name lookups.",
|
||||
entityType,
|
||||
stagedIndex,
|
||||
canonicalIndex);
|
||||
markPromotionFailed(entityType, false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (needsCanonicalAlias) {
|
||||
|
|
@ -343,7 +358,7 @@ public class DefaultRecreateHandler implements RecreateIndexHandler {
|
|||
searchClient.addAliases(stagedIndex, Set.of(canonicalIndex));
|
||||
} catch (Exception ex) {
|
||||
LOG.error(
|
||||
"[ALIAS_PROMOTE_FAILED phase=swap2 entity={} stagedIndex={} canonicalIndex={}] "
|
||||
"[ALIAS_PROMOTE_FAILED phase=swap2 entity={} stagedIndex={} canonicalIndex={} reason=exception] "
|
||||
+ "DATA UNAVAILABLE: canonical was deleted but canonical-name alias add failed. "
|
||||
+ "Parent aliases work; canonical-name lookups will 404 until manual repair.",
|
||||
entityType,
|
||||
|
|
@ -353,6 +368,17 @@ public class DefaultRecreateHandler implements RecreateIndexHandler {
|
|||
markPromotionFailed(entityType, true);
|
||||
return false;
|
||||
}
|
||||
if (!searchClient.getAliases(stagedIndex).contains(canonicalIndex)) {
|
||||
LOG.error(
|
||||
"[ALIAS_PROMOTE_FAILED phase=swap2 entity={} stagedIndex={} canonicalIndex={} reason=alias-not-attached] "
|
||||
+ "DATA UNAVAILABLE: client did not throw, but canonical-name alias is not on staged index. "
|
||||
+ "Parent aliases work; canonical-name lookups will 404 until manual repair.",
|
||||
entityType,
|
||||
stagedIndex,
|
||||
canonicalIndex);
|
||||
markPromotionFailed(entityType, true);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info(
|
||||
|
|
|
|||
|
|
@ -495,7 +495,9 @@ class DefaultRecreateHandlerTest {
|
|||
new DefaultRecreateHandler().withJobData(jobData).promoteEntityIndex(context, true);
|
||||
}
|
||||
|
||||
verify(client).forceMerge("table_search_index_rebuild_new", 1);
|
||||
org.mockito.InOrder order = org.mockito.Mockito.inOrder(client);
|
||||
order.verify(client).forceMerge("table_search_index_rebuild_new", 1);
|
||||
order.verify(client).swapAliases(anySet(), eq("table_search_index_rebuild_new"), anySet());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in a new issue