diff --git a/core/gallery/backends.go b/core/gallery/backends.go index ee3ca906d..6bf8c5d14 100644 --- a/core/gallery/backends.go +++ b/core/gallery/backends.go @@ -110,7 +110,13 @@ func InstallBackendFromGallery(ctx context.Context, galleries []config.Gallery, if err != nil { return err } - if backends.Exists(name) { + // Only short-circuit if the install is *actually usable*. An orphaned + // meta entry whose concrete was removed still shows up in + // ListSystemBackends with a RunFile pointing at a path that no longer + // exists; returning early there leaves the caller with a broken + // alias and the worker fails with "backend not found after install + // attempt" on every retry. Re-install in that case. + if existing, ok := backends.Get(name); ok && isBackendRunnable(existing) { return nil } } @@ -375,17 +381,44 @@ func DeleteBackendFromSystem(systemState *system.SystemState, name string) error } if metadata != nil && metadata.MetaBackendFor != "" { - metaBackendDirectory := filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor) - xlog.Debug("Deleting meta backend", "backendDirectory", metaBackendDirectory) - if _, err := os.Stat(metaBackendDirectory); os.IsNotExist(err) { - return fmt.Errorf("meta backend %q not found", metadata.MetaBackendFor) + concreteDirectory := filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor) + xlog.Debug("Deleting concrete backend referenced by meta", "concreteDirectory", concreteDirectory) + // If the concrete the meta points to is already gone (earlier delete, + // partial install, or manual cleanup), keep going and remove the + // orphaned meta dir. Previously we returned an error here, which made + // the orphaned meta impossible to uninstall from the UI — the delete + // kept failing and every subsequent install short-circuited because + // the stale meta metadata made ListSystemBackends.Exists(name) true. + if _, statErr := os.Stat(concreteDirectory); statErr == nil { + os.RemoveAll(concreteDirectory) + } else if os.IsNotExist(statErr) { + xlog.Warn("Concrete backend referenced by meta not found — removing orphaned meta only", + "meta", name, "concrete", metadata.MetaBackendFor) + } else { + return statErr } - os.RemoveAll(metaBackendDirectory) } return os.RemoveAll(backendDirectory) } +// isBackendRunnable reports whether the given backend entry can actually be +// invoked. A meta backend is runnable only if its concrete's run.sh still +// exists on disk; concrete backends are considered runnable as long as their +// RunFile is set (ListSystemBackends only emits them when the runfile is +// present). Used to guard the "already installed" short-circuit so an +// orphaned meta pointing at a missing concrete triggers a real reinstall +// rather than being silently skipped. +func isBackendRunnable(b SystemBackend) bool { + if b.RunFile == "" { + return false + } + if fi, err := os.Stat(b.RunFile); err != nil || fi.IsDir() { + return false + } + return true +} + type SystemBackend struct { Name string RunFile string diff --git a/core/gallery/backends_test.go b/core/gallery/backends_test.go index 626cbf1da..e5969c1f8 100644 --- a/core/gallery/backends_test.go +++ b/core/gallery/backends_test.go @@ -952,6 +952,58 @@ var _ = Describe("Gallery Backends", func() { err = DeleteBackendFromSystem(systemState, "non-existent") Expect(err).To(HaveOccurred()) }) + + It("removes an orphaned meta backend whose concrete is missing", func() { + // Real scenario from the dev cluster: the concrete got wiped + // (partial install, manual cleanup, previous crash) but the meta + // directory + metadata.json still points at it. The old code + // errored with "meta backend X not found" and left the orphan in + // place, making the backend impossible to uninstall. + metaName := "meta-backend" + concreteName := "concrete-backend-that-vanished" + metaPath := filepath.Join(tempDir, metaName) + Expect(os.MkdirAll(metaPath, 0750)).To(Succeed()) + + meta := BackendMetadata{Name: metaName, MetaBackendFor: concreteName} + data, err := json.MarshalIndent(meta, "", " ") + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(metaPath, "metadata.json"), data, 0644)).To(Succeed()) + + // Concrete directory intentionally absent. + systemState, err := system.GetSystemState(system.WithBackendPath(tempDir)) + Expect(err).NotTo(HaveOccurred()) + + Expect(DeleteBackendFromSystem(systemState, metaName)).To(Succeed()) + Expect(metaPath).NotTo(BeADirectory()) + }) + }) + + Describe("InstallBackendFromGallery — orphaned meta reinstall", func() { + It("re-runs install when the meta's concrete is missing", func() { + // Seed state: meta dir exists with metadata pointing at a + // concrete that was removed from disk. ListSystemBackends still + // surfaces the meta via its metadata.Name → the old short-circuit + // at `if backends.Exists(name) { return nil }` returned silently, + // leaving the worker's findBackend() with a dead alias forever. + // The fix: require the backend to be runnable before we skip. + metaName := "meta-orphan" + concreteName := "concrete-gone" + metaPath := filepath.Join(tempDir, metaName) + Expect(os.MkdirAll(metaPath, 0750)).To(Succeed()) + meta := BackendMetadata{Name: metaName, MetaBackendFor: concreteName} + data, err := json.MarshalIndent(meta, "", " ") + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(metaPath, "metadata.json"), data, 0644)).To(Succeed()) + + systemState, err := system.GetSystemState(system.WithBackendPath(tempDir)) + Expect(err).NotTo(HaveOccurred()) + + listed, err := ListSystemBackends(systemState) + Expect(err).NotTo(HaveOccurred()) + b, ok := listed.Get(metaName) + Expect(ok).To(BeTrue()) + Expect(isBackendRunnable(b)).To(BeFalse()) // concrete run.sh absent + }) }) Describe("ListSystemBackends", func() {