fix: health check doesn't detect resource deletion during sync operation (#26286)

Signed-off-by: Christopher Coco <ccoco@redhat.com>
Co-authored-by: Alexandre Gaudreault <alexandre_gaudreault@intuit.com>
This commit is contained in:
Christopher Coco 2026-03-11 17:14:30 -04:00 committed by GitHub
parent 4c59c3c7ec
commit f8c0d7efcf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 132 additions and 0 deletions

View file

@ -531,6 +531,22 @@ func (sc *syncContext) Sync() {
multiStep := tasks.multiStep()
runningTasks := tasks.Filter(func(t *syncTask) bool { return (multiStep || t.isHook()) && t.running() })
if runningTasks.Len() > 0 {
// check if any of the running task's resources are missing to prevent infinite loop of waiting for healthy
for _, task := range runningTasks {
if task.liveObj == nil {
liveObj, err := sc.getResource(task)
if err != nil && !apierrors.IsNotFound(err) {
sc.setResourceResult(task, task.syncStatus, common.OperationError, fmt.Sprintf("Failed to get live resource %v", err))
continue
}
if liveObj != nil {
continue
}
sc.setResourceResult(task, common.ResultCodeSyncFailed, common.OperationError, fmt.Sprintf("Resource %s/%s/%s is missing, it might have been deleted", task.group(), task.kind(), task.name()))
}
}
sc.setRunningPhase(runningTasks, false)
return
}

View file

@ -111,6 +111,17 @@ func TestSyncNamespaceCreatedBeforeDryRunWithoutFailure(t *testing.T) {
Live: []*unstructured.Unstructured{nil, nil},
Target: []*unstructured.Unstructured{pod},
})
ns := &unstructured.Unstructured{}
ns.SetGroupVersionKind(schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Namespace"})
ns.SetName(testingutils.FakeArgoCDNamespace)
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
fakeDynamicClient.PrependReactor("get", "namespaces", func(action testcore.Action) (bool, runtime.Object, error) {
return true, ns, nil
})
syncCtx.dynamicIf = fakeDynamicClient
syncCtx.Sync()
phase, msg, resources := syncCtx.GetState()
assert.Equal(t, synccommon.OperationRunning, phase)
@ -348,6 +359,102 @@ func TestSyncSuccessfully_Multistep(t *testing.T) {
assert.Len(t, resources, 2)
}
func TestSync_MultistepResourceDeletionMidstep(t *testing.T) {
pod1 := testingutils.NewPod()
pod1.SetName("pod-1")
pod1.SetNamespace("fake-argocd-ns")
pod1.SetAnnotations(map[string]string{synccommon.AnnotationSyncWave: "1"})
pod2 := testingutils.NewPod()
pod2.SetName("pod-2")
pod2.SetNamespace("fake-argocd-ns")
pod2.SetAnnotations(map[string]string{synccommon.AnnotationSyncWave: "2"})
tests := []struct {
name string
resourcesStart map[kube.ResourceKey]reconciledResource
resourcesChange map[kube.ResourceKey]reconciledResource
statusExpected synccommon.ResultCode
hookPhaseExpected synccommon.OperationPhase
clientGet bool
}{
{
name: "resource deleted during multistep",
resourcesStart: groupResources(ReconciliationResult{
Live: []*unstructured.Unstructured{pod1, pod2},
Target: []*unstructured.Unstructured{pod1, pod2},
}),
resourcesChange: groupResources(ReconciliationResult{
Live: []*unstructured.Unstructured{nil, pod2},
Target: []*unstructured.Unstructured{pod1, pod2},
}),
statusExpected: synccommon.ResultCodeSyncFailed,
hookPhaseExpected: synccommon.OperationError,
clientGet: false,
},
{
name: "no false positive on resource creation",
resourcesStart: groupResources(ReconciliationResult{
Live: []*unstructured.Unstructured{nil, pod2},
Target: []*unstructured.Unstructured{pod1, pod2},
}),
resourcesChange: groupResources(ReconciliationResult{
Live: []*unstructured.Unstructured{pod1, pod2},
Target: []*unstructured.Unstructured{pod1, pod2},
}),
statusExpected: synccommon.ResultCodeSynced,
hookPhaseExpected: synccommon.OperationRunning,
clientGet: false,
},
{
name: "resource created after task sync started",
resourcesStart: groupResources(ReconciliationResult{
Live: []*unstructured.Unstructured{nil, pod2},
Target: []*unstructured.Unstructured{pod1, pod2},
}),
resourcesChange: groupResources(ReconciliationResult{
Live: []*unstructured.Unstructured{nil, pod2},
Target: []*unstructured.Unstructured{pod1, pod2},
}),
statusExpected: synccommon.ResultCodeSynced,
hookPhaseExpected: synccommon.OperationRunning,
clientGet: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
syncCtx := newTestSyncCtx(nil, WithResourceModificationChecker(true, diffResultList()))
syncCtx.resources = tt.resourcesStart
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
if tt.clientGet {
fakeDynamicClient.PrependReactor("get", "pods", func(action testcore.Action) (bool, runtime.Object, error) {
return true, pod1, nil
})
}
syncCtx.dynamicIf = fakeDynamicClient
syncCtx.Sync()
phase, _, resources := syncCtx.GetState()
assert.Len(t, resources, 1)
assert.Equal(t, "pod-1", resources[0].ResourceKey.Name)
assert.Equal(t, synccommon.OperationRunning, phase)
assert.Equal(t, synccommon.ResultCodeSynced, resources[0].Status)
assert.Equal(t, synccommon.OperationRunning, resources[0].HookPhase)
syncCtx.resources = tt.resourcesChange
syncCtx.Sync()
phase, _, resources = syncCtx.GetState()
assert.Equal(t, synccommon.OperationRunning, phase)
assert.Len(t, resources, 1)
assert.Equal(t, "pod-1", resources[0].ResourceKey.Name)
assert.Equal(t, tt.statusExpected, resources[0].Status)
assert.Equal(t, tt.hookPhaseExpected, resources[0].HookPhase)
})
}
}
func TestSyncDeleteSuccessfully(t *testing.T) {
syncCtx := newTestSyncCtx(nil, WithOperationSettings(false, true, false, false))
svc := testingutils.NewService()
@ -514,6 +621,12 @@ func TestSync_ApplyOutOfSyncOnly(t *testing.T) {
Target: []*unstructured.Unstructured{pod1, nil, pod3},
})
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
fakeDynamicClient.PrependReactor("get", "pods", func(action testcore.Action) (bool, runtime.Object, error) {
return true, pod1, nil
})
syncCtx.dynamicIf = fakeDynamicClient
syncCtx.Sync()
phase, _, resources := syncCtx.GetState()
assert.Equal(t, synccommon.OperationRunning, phase)
@ -1930,6 +2043,9 @@ func TestSync_SyncWaveHook(t *testing.T) {
})
syncCtx.hooks = []*unstructured.Unstructured{pod3}
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
syncCtx.dynamicIf = fakeDynamicClient
called := false
syncCtx.syncWaveHook = func(phase synccommon.SyncPhase, wave int, final bool) error {
called = true