LocalAI/core/application/startup.go

773 lines
30 KiB
Go
Raw Normal View History

package application
import (
"crypto/rand"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
feat: Add backend gallery (#5607) * feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 12:56:52 +00:00
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/auth"
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/core/services/jobs"
"github.com/mudler/LocalAI/core/services/nodes"
"github.com/mudler/LocalAI/core/services/storage"
"github.com/mudler/LocalAI/pkg/vram"
coreStartup "github.com/mudler/LocalAI/core/startup"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
"github.com/mudler/LocalAI/pkg/sanitize"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/mudler/xlog"
)
func New(opts ...config.AppOption) (*Application, error) {
options := config.NewApplicationConfig(opts...)
// Store a copy of the startup config (from env vars, before file loading)
// This is used to determine if settings came from env vars vs file
startupConfigCopy := *options
application := newApplication(options)
application.startupConfig = &startupConfigCopy
xlog.Info("Starting LocalAI", "threads", options.Threads, "modelsPath", options.SystemState.Model.ModelsPath)
xlog.Info("LocalAI version", "version", internal.PrintableVersion())
if err := application.start(); err != nil {
return nil, err
}
caps, err := xsysinfo.CPUCapabilities()
if err == nil {
xlog.Debug("CPU capabilities", "capabilities", caps)
}
gpus, err := xsysinfo.GPUs()
if err == nil {
xlog.Debug("GPU count", "count", len(gpus))
for _, gpu := range gpus {
xlog.Debug("GPU", "gpu", gpu.String())
}
}
// Make sure directories exists
if options.SystemState.Model.ModelsPath == "" {
return nil, fmt.Errorf("models path cannot be empty")
}
err = os.MkdirAll(options.SystemState.Model.ModelsPath, 0750)
if err != nil {
return nil, fmt.Errorf("unable to create ModelPath: %q", err)
}
if options.GeneratedContentDir != "" {
err := os.MkdirAll(options.GeneratedContentDir, 0750)
if err != nil {
return nil, fmt.Errorf("unable to create ImageDir: %q", err)
}
}
if options.UploadDir != "" {
err := os.MkdirAll(options.UploadDir, 0750)
if err != nil {
return nil, fmt.Errorf("unable to create UploadDir: %q", err)
}
}
// Create and migrate data directory
if options.DataPath != "" {
if err := os.MkdirAll(options.DataPath, 0750); err != nil {
return nil, fmt.Errorf("unable to create DataPath: %q", err)
}
// Migrate data from DynamicConfigsDir to DataPath if needed
if options.DynamicConfigsDir != "" && options.DataPath != options.DynamicConfigsDir {
migrateDataFiles(options.DynamicConfigsDir, options.DataPath)
}
}
// Initialize auth database if auth is enabled
if options.Auth.Enabled {
// Auto-generate HMAC secret if not provided
if options.Auth.APIKeyHMACSecret == "" {
secretFile := filepath.Join(options.DataPath, ".hmac_secret")
secret, err := loadOrGenerateHMACSecret(secretFile)
if err != nil {
return nil, fmt.Errorf("failed to initialize HMAC secret: %w", err)
}
options.Auth.APIKeyHMACSecret = secret
}
authDB, err := auth.InitDB(options.Auth.DatabaseURL)
if err != nil {
return nil, fmt.Errorf("failed to initialize auth database: %w", err)
}
application.authDB = authDB
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
xlog.Info("Auth enabled", "database", sanitize.URL(options.Auth.DatabaseURL))
// Start session and expired API key cleanup goroutine
go func() {
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
for {
select {
case <-options.Context.Done():
return
case <-ticker.C:
if err := auth.CleanExpiredSessions(authDB); err != nil {
xlog.Error("failed to clean expired sessions", "error", err)
}
if err := auth.CleanExpiredAPIKeys(authDB); err != nil {
xlog.Error("failed to clean expired API keys", "error", err)
}
}
}
}()
}
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
// Wire JobStore for DB-backed task/job persistence whenever auth DB is available.
// This ensures tasks and jobs survive restarts in both single-node and distributed modes.
if application.authDB != nil && application.agentJobService != nil {
dbJobStore, err := jobs.NewJobStore(application.authDB)
if err != nil {
xlog.Error("Failed to create job store for auth DB", "error", err)
} else {
application.agentJobService.SetDistributedJobStore(dbJobStore)
}
}
// Initialize distributed mode services (NATS, object storage, node registry)
feat(concurrency-groups): per-model exclusive groups for backend loading (#9662) * feat(concurrency-groups): per-model exclusive groups for backend loading Adds `concurrency_groups: [...]` to model YAML configs. Two models that share a group cannot be loaded concurrently on the same node — loading one evicts the others, reusing the existing pinned/busy/retry policy from LRU eviction. Layered design: - Watchdog (pkg/model): per-node correctness floor — on every Load(), evict any loaded model that shares a group with the requested one. Pinned skips surface NeedMore so the loader retries (and ultimately logs a clear warning), instead of silently allowing the rule to be violated. - Distributed scheduler (core/services/nodes): soft anti-affinity hint — scheduleNewModel prefers nodes that don't already host a same-group model, falling back to eviction only if every candidate has a conflict. Composes with NodeSelector at the same point in the candidate pipeline. Per-node, not cluster-wide: VRAM is a node-local resource, and two heavy models running on different nodes is fine. The ConfigLoader is wired into SmartRouter via a small ConcurrencyConflictResolver interface so the nodes package keeps a narrow surface on core/config. Refactors the inner LRU eviction body into a shared collectEvictionsLocked helper and the loader retry loop into retryEnforce(fn, maxRetries, interval), so both LRU and group enforcement share busy/pinned/retry semantics. Closes #9659. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(watchdog): sync pinned + concurrency_groups at startup The startup-time watchdog setup lives in initializeWatchdog (startup.go), not in startWatchdog (watchdog.go). The latter is only invoked from the runtime-settings RestartWatchdog path. As a result, neither SyncPinnedModelsToWatchdog nor SyncModelGroupsToWatchdog ran at boot, so `pinned: true` and `concurrency_groups: [...]` only became effective after a settings-driven watchdog restart. Fix by adding both sync calls to initializeWatchdog. Confirmed end-to-end: loading model A in group "heavy", then C with no group (coexists), then B in group "heavy" now correctly evicts A and leaves [B, C]. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(test): satisfy errcheck on new os.Remove in concurrency_groups spec CI lint runs new-from-merge-base, so the existing pre-existing `defer os.Remove(tmp.Name())` lines are baseline-grandfathered but the one introduced by the concurrency_groups YAML round-trip test is held to errcheck. Wrap the remove in a closure that discards the error. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-05 06:42:50 +00:00
distSvc, err := initDistributed(options, application.authDB, application.ModelConfigLoader())
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
if err != nil {
return nil, fmt.Errorf("distributed mode initialization failed: %w", err)
}
if distSvc != nil {
application.distributed = distSvc
// Wire remote model unloader so ShutdownModel works for remote nodes
// Uses NATS to tell serve-backend nodes to Free + kill their backend process
application.modelLoader.SetRemoteUnloader(distSvc.Unloader)
// Wire ModelRouter so grpcModel() delegates to SmartRouter in distributed mode
application.modelLoader.SetModelRouter(distSvc.ModelAdapter.AsModelRouter())
// Wire DistributedModelStore so shutdown/list/watchdog can find remote models
distStore := nodes.NewDistributedModelStore(
model.NewInMemoryModelStore(),
distSvc.Registry,
)
application.modelLoader.SetModelStore(distStore)
// Start health monitor
distSvc.Health.Start(options.Context)
// Start replica reconciler for auto-scaling model replicas
if distSvc.Reconciler != nil {
go distSvc.Reconciler.Run(options.Context)
}
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
// In distributed mode, MCP CI jobs are executed by agent workers (not the frontend)
// because the frontend can't create MCP sessions (e.g., stdio servers using docker).
// The dispatcher still subscribes to jobs.new for persistence (result/progress subs)
// but does NOT set a workerFn — agent workers consume jobs from the same NATS queue.
// Wire model config loader so job events include model config for agent workers
distSvc.Dispatcher.SetModelConfigLoader(application.backendLoader)
// Start job dispatcher — abort startup if it fails, as jobs would be accepted but never dispatched
if err := distSvc.Dispatcher.Start(options.Context); err != nil {
return nil, fmt.Errorf("starting job dispatcher: %w", err)
}
// Start ephemeral file cleanup
storage.StartEphemeralCleanup(options.Context, distSvc.FileMgr, 0, 0)
// Wire distributed backends into AgentJobService (before Start)
if application.agentJobService != nil {
application.agentJobService.SetDistributedBackends(distSvc.Dispatcher)
application.agentJobService.SetDistributedJobStore(distSvc.JobStore)
}
// Wire skill store into AgentPoolService (wired at pool start time via closure)
// The actual wiring happens in StartAgentPool since the pool doesn't exist yet.
// Wire NATS and gallery store into GalleryService for cross-instance progress/cancel
if application.galleryService != nil {
application.galleryService.SetNATSClient(distSvc.Nats)
if distSvc.DistStores != nil && distSvc.DistStores.Gallery != nil {
// Clean up stale in-progress operations from previous crashed instances
if err := distSvc.DistStores.Gallery.CleanStale(30 * time.Minute); err != nil {
xlog.Warn("Failed to clean stale gallery operations", "error", err)
}
application.galleryService.SetGalleryStore(distSvc.DistStores.Gallery)
}
// Wire distributed model/backend managers so delete propagates to workers
application.galleryService.SetModelManager(
nodes.NewDistributedModelManager(options, application.modelLoader, distSvc.Unloader),
)
application.galleryService.SetBackendManager(
nodes.NewDistributedBackendManager(options, application.modelLoader, distSvc.Unloader, distSvc.Registry),
)
}
}
// Start AgentJobService (after distributed wiring so it knows whether to use local or NATS)
if application.agentJobService != nil {
if err := application.agentJobService.Start(options.Context); err != nil {
return nil, fmt.Errorf("starting agent job service: %w", err)
}
}
if err := coreStartup.InstallModels(options.Context, application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
xlog.Error("error installing models", "error", err)
}
for _, backend := range options.ExternalBackends {
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
if err := galleryop.InstallExternalBackend(options.Context, options.BackendGalleries, options.SystemState, application.ModelLoader(), nil, backend, "", ""); err != nil {
xlog.Error("error installing external backend", "error", err)
}
feat: Add backend gallery (#5607) * feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 12:56:52 +00:00
}
configLoaderOpts := options.ToConfigLoaderOptions()
if err := application.ModelConfigLoader().LoadModelConfigsFromPath(options.SystemState.Model.ModelsPath, configLoaderOpts...); err != nil {
xlog.Error("error loading config files", "error", err)
}
if err := gallery.RegisterBackends(options.SystemState, application.ModelLoader()); err != nil {
xlog.Error("error registering external backends", "error", err)
feat: Add backend gallery (#5607) * feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 12:56:52 +00:00
}
feat: backend versioning, upgrade detection and auto-upgrade (#9315) * feat: add backend versioning data model foundation Add Version, URI, and Digest fields to BackendMetadata for tracking installed backend versions and enabling upgrade detection. Add Version field to GalleryBackend. Add UpgradeAvailable/AvailableVersion fields to SystemBackend. Implement GetImageDigest() for lightweight OCI digest lookups via remote.Head. Record version, URI, and digest at install time in InstallBackend() and propagate version through meta backends. * feat: add backend upgrade detection and execution logic Add CheckBackendUpgrades() to compare installed backend versions/digests against gallery entries, and UpgradeBackend() to perform atomic upgrades with backup-based rollback on failure. Includes Agent A's data model changes (Version/URI/Digest fields, GetImageDigest). * feat: add AutoUpgradeBackends config and runtime settings Add configuration and runtime settings for backend auto-upgrade: - RuntimeSettings field for dynamic config via API/JSON - ApplicationConfig field, option func, and roundtrip conversion - CLI flag with LOCALAI_AUTO_UPGRADE_BACKENDS env var - Config file watcher support for runtime_settings.json - Tests for ToRuntimeSettings, ApplyRuntimeSettings, and roundtrip * feat(ui): add backend version display and upgrade support - Add upgrade check/trigger API endpoints to config and api module - Backends page: version badge, upgrade indicator, upgrade button - Manage page: version in metadata, context-aware upgrade/reinstall button - Settings page: auto-upgrade backends toggle * feat: add upgrade checker service, API endpoints, and CLI command - UpgradeChecker background service: checks every 6h, auto-upgrades when enabled - API endpoints: GET /backends/upgrades, POST /backends/upgrades/check, POST /backends/upgrade/:name - CLI: `localai backends upgrade` command, version display in `backends list` - BackendManager interface: add UpgradeBackend and CheckUpgrades methods - Wire upgrade op through GalleryService backend handler - Distributed mode: fan-out upgrade to worker nodes via NATS * fix: use advisory lock for upgrade checker in distributed mode In distributed mode with multiple frontend instances, use PostgreSQL advisory lock (KeyBackendUpgradeCheck) so only one instance runs periodic upgrade checks and auto-upgrades. Prevents duplicate upgrade operations across replicas. Standalone mode is unchanged (simple ticker loop). * test: add e2e tests for backend upgrade API - Test GET /api/backends/upgrades returns 200 (even with no upgrade checker) - Test POST /api/backends/upgrade/:name accepts request and returns job ID - Test full upgrade flow: trigger upgrade via API, wait for job completion, verify run.sh updated to v2 and metadata.json has version 2.0.0 - Test POST /api/backends/upgrades/check returns 200 - Fix nil check for applicationInstance in upgrade API routes
2026-04-11 20:31:15 +00:00
// Start background upgrade checker for backends.
// In distributed mode, uses PostgreSQL advisory lock so only one frontend
// instance runs periodic checks (avoids duplicate upgrades across replicas).
if len(options.BackendGalleries) > 0 {
feat(distributed): sync state with frontends, better backend management reporting (#9426) * fix(distributed): detect backend upgrades across worker nodes Before this change `DistributedBackendManager.CheckUpgrades` delegated to the local manager, which read backends from the frontend filesystem. In distributed deployments the frontend has no backends installed locally — they live on workers — so the upgrade-detection loop never ran and the UI silently never surfaced upgrades even when the gallery advertised newer versions or digests. Worker-side: NATS backend.list reply now carries Version, URI and Digest for each installed backend (read from metadata.json). Frontend-side: DistributedBackendManager.ListBackends aggregates per-node refs (name, status, version, digest) instead of deduping, and CheckUpgrades feeds that aggregation into gallery.CheckUpgradesAgainst — a new entrypoint factored out of CheckBackendUpgrades so both paths share the same core logic. Cluster drift policy: when per-node version/digest tuples disagree, the backend is flagged upgradeable regardless of whether any single node matches the gallery, and UpgradeInfo.NodeDrift enumerates the outliers so operators can see *why* it is out of sync. The next upgrade-all realigns the cluster. Tests cover: drift detection, unanimous-match (no upgrade), and the empty-installed-version path that the old distributed code silently missed. * feat(ui): surface backend upgrades in the System page The System page (Manage.jsx) only showed updates as a tiny inline arrow, so operators routinely missed them. Port the Backend Gallery's upgrade UX so System speaks the same visual language: - Yellow banner at the top of the Backends tab when upgrades are pending, with an "Upgrade all" button (serial fan-out, matches the gallery) and a "Updates only" filter toggle. - Warning pill (↑ N) next to the tab label so the count is glanceable even when the banner is scrolled out of view. - Per-row labeled "Upgrade to vX.Y" button (replaces the icon-only button that silently flipped semantics between Reinstall and Upgrade), plus an "Update available" badge in the new Version column. - New columns: Version (with upgrade + drift chips), Nodes (per-node attribution badges for distributed mode, degrading to a compact "on N nodes · M offline" chip above three nodes), Installed (relative time). - System backends render a "Protected" chip instead of a bare "—" so rows still align and the reason is obvious. - Delete uses the softer btn-danger-ghost so rows don't scream red; the ConfirmDialog still owns the "are you sure". The upgrade checker also needed the same per-worker fix as the previous commit: NewUpgradeChecker now takes a BackendManager getter so its periodic runs call the distributed CheckUpgrades (which asks workers) instead of the empty frontend filesystem. Without this the /api/backends/ upgrades endpoint stayed empty in distributed mode even with the protocol change in place. New CSS primitives — .upgrade-banner, .tab-pill, .badge-row, .cell-stack, .cell-mono, .cell-muted, .row-actions, .btn-danger-ghost — all live in App.css so other pages can adopt them without duplicating styles. * feat(ui): polish the Nodes page so it reads like a product The Nodes page was the biggest visual liability in distributed mode. Rework the main dashboard surfaces in place without changing behavior: StatCards: uniform height (96px min), left accent bar colored by the metric's semantic (success/warning/error/primary), icon lives in a 36x36 soft-tinted chip top-right, value is left-aligned and large. Grid auto-fills so the row doesn't collapse on narrow viewports. This replaces the previous thin-bordered boxes with inconsistent heights. Table rows: expandable rows now show a chevron cue on the left (rotates on expand) so users know rows open. Status cell became a dedicated chip with an LED-style halo dot instead of a bare bullet. Action buttons gained labels — "Approve", "Resume", "Drain" — so the icons aren't doing all the semantic work; the destructive remove action uses the softer btn-danger-ghost variant so rows don't scream red, with the ConfirmDialog still owning the real "are you sure". Applied cell-mono/cell-muted utility classes so label chips and addresses share one spacing/font grammar instead of re-declaring inline styles everywhere. Expanded drawer: empty states for Loaded Models and Installed Backends now render as a proper drawer-empty card (dashed border, icon, one-line hint) instead of a plain muted string that read like broken formatting. Tabs: three inline-styled buttons became the shared .tab class so they inherit focus ring, hover state, and the rest of the design system — matches the System page. "Add more workers" toggle turned into a .nodes-add-worker dashed-border button labelled "Register a new worker" (action voice) instead of a chevron + muted link that operators kept mistaking for broken text. New shared CSS primitives carry over to other pages: .stat-grid + .stat-card, .row-chevron, .node-status, .drawer-empty, .nodes-add-worker. * feat(distributed): durable backend fan-out + state reconciliation Two connected problems handled together: 1) Backend delete/install/upgrade used to silently skip non-healthy nodes, so a delete during an outage left a zombie on the offline node once it returned. The fan-out now records intent in a new pending_backend_ops table before attempting the NATS round-trip. Currently-healthy nodes get an immediate attempt; everyone else is queued. Unique index on (node_id, backend, op) means reissuing the same operation refreshes next_retry_at instead of stacking duplicates. 2) Loaded-model state could drift from reality: a worker OOM'd, got killed, or restarted a backend process would leave a node_models row claiming the model was still loaded, feeding ghost entries into the /api/nodes/models listing and the router's scheduling decisions. The existing ReplicaReconciler gains two new passes that run under a fresh KeyStateReconciler advisory lock (non-blocking, so one wedged frontend doesn't freeze the cluster): - drainPendingBackendOps: retries queued ops whose next_retry_at has passed on currently-healthy nodes. Success deletes the row; failure bumps attempts and pushes next_retry_at out with exponential backoff (30s → 15m cap). ErrNoResponders also marks the node unhealthy. - probeLoadedModels: gRPC-HealthChecks addresses the DB thinks are loaded but hasn't seen touched in the last probeStaleAfter (2m). Unreachable addresses are removed from the registry. A pluggable ModelProber lets tests substitute a fake without standing up gRPC. DistributedBackendManager exposes DeleteBackendDetailed so the HTTP handler can surface per-node outcomes ("2 succeeded, 1 queued") to the UI in a follow-up commit; the existing DeleteBackend still returns error-only for callers that don't care about node breakdown. Multi-frontend safety: the state pass uses advisorylock.TryWithLockCtx on a new key so N frontends coordinate — the same pattern the health monitor and replica reconciler already rely on. Single-node mode runs both passes inline (adapter is nil, state drain is a no-op). Tests cover the upsert semantics, backoff math, the probe removing an unreachable model but keeping a reachable one, and filtering by probeStaleAfter. * feat(ui): show cluster distribution of models in the System page When a frontend restarted in distributed mode, models that workers had already loaded weren't visible until the operator clicked into each node manually — the /api/models/capabilities endpoint only knew about configs on the frontend's filesystem, not the registry-backed truth. /api/models/capabilities now joins in ListAllLoadedModels() when the registry is active, returning loaded_on[] with node id/name/state/status for each model. Models that live in the registry but lack a local config (the actual ghosts, not recovered from the frontend's file cache) still surface with source="registry-only" so operators can see and persist them; without that emission they'd be invisible to this frontend. Manage → Models replaces the old Running/Idle pill with a distribution cell that lists the first three nodes the model is loaded on as chips colored by state (green loaded, blue loading, amber anything else). On wider clusters the remaining count collapses into a +N chip with a title-attribute breakdown. Disabled / single-node behavior unchanged. Adopted models get an extra "Adopted" ghost-icon chip with hover copy explaining what it means and how to make it permanent. Distributed mode also enables a 10s auto-refresh and a "Last synced Xs ago" indicator next to the Update button so ghost rows drop off within one reconcile tick after their owning process dies. Non-distributed mode is untouched — no polling, no cell-stack, same old Running/Idle. * feat(ui): NodeDistributionChip — shared per-node attribution component Large clusters were going to break the Manage → Backends Nodes column: the old inline logic rendered every node as a badge and would shred the layout at >10 workers, plus the Manage → Models distribution cell had copy-pasted its own slightly-different version. NodeDistributionChip handles any cluster size with two render modes: - small (≤3 nodes): inline chips of node names, colored by health. - large: a single "on N nodes · M offline · K drift" summary chip; clicking opens a Popover with a per-node table (name, status, version, digest for backends; name, status, state for models). Drift counting mirrors the backend's summarizeNodeDrift so the UI number matches UpgradeInfo.NodeDrift. Digests are truncated to the docker-style 12-char form with the full value preserved in the title. Popover is a new general-purpose primitive: fixed positioning anchored to the trigger, flips above when there's no room below, closes on outside-click or Escape, returns focus to the trigger. Uses .card as its surface so theming is inherited. Also useful for a future labels-editor popup and the user menu. Manage.jsx drops its duplicated inline Nodes-column + loaded_on cell and uses the shared chip with context="backends" / "models" respectively. Delete code removes ~40 lines of ad-hoc logic. * feat(ui): shared FilterBar across the System page tabs The Backends gallery had a nice search + chip + toggle strip; the System page had nothing, so the two surfaces felt like different apps. Lift the pattern into a reusable FilterBar and wire both System tabs through it. New component core/http/react-ui/src/components/FilterBar.jsx renders a search input, a role="tablist" chip row (aria-selected for a11y), and optional toggles / right slot. Chips support an optional `count` which the System page uses to show "User 3", "Updates 1" etc. System Models tab: search by id or backend; chips for All/Running/Idle/Disabled/Pinned plus a conditional Distributed chip in distributed mode. "Last synced" + Update button live in the right slot. System Backends tab: search by name/alias/meta-backend-for; chips for All/User/System/Meta plus conditional Updates / Offline-nodes chips when relevant. The old ad-hoc "Updates only" toggle from the upgrade banner folded into the Updates chip — one source of truth for that filter. Offline chip only appears in distributed mode when at least one backend has an unhealthy node, so the chip row stays quiet on healthy clusters. Filter state persists in URL query params (mq/mf/bq/bf) so deep links and tab switches keep the operator's filter context instead of resetting every time. Also adds an "Adopted" distribution path: when a model in /api/models/capabilities carries source="registry-only" (discovered on a worker but not configured locally), the Models tab shows a ghost chip labelled "Adopted" with hover copy explaining how to persist it — this is what closes the loop on the ghost-model story end-to-end.
2026-04-19 15:55:53 +00:00
// Pass a lazy getter for the backend manager so the checker always
// uses the active one — DistributedBackendManager is swapped in above
// and asks workers for their installed backends, which is what
// upgrade detection needs in distributed mode.
bmFn := func() galleryop.BackendManager { return application.GalleryService().BackendManager() }
uc := NewUpgradeChecker(options, application.ModelLoader(), application.distributedDB(), bmFn)
feat: backend versioning, upgrade detection and auto-upgrade (#9315) * feat: add backend versioning data model foundation Add Version, URI, and Digest fields to BackendMetadata for tracking installed backend versions and enabling upgrade detection. Add Version field to GalleryBackend. Add UpgradeAvailable/AvailableVersion fields to SystemBackend. Implement GetImageDigest() for lightweight OCI digest lookups via remote.Head. Record version, URI, and digest at install time in InstallBackend() and propagate version through meta backends. * feat: add backend upgrade detection and execution logic Add CheckBackendUpgrades() to compare installed backend versions/digests against gallery entries, and UpgradeBackend() to perform atomic upgrades with backup-based rollback on failure. Includes Agent A's data model changes (Version/URI/Digest fields, GetImageDigest). * feat: add AutoUpgradeBackends config and runtime settings Add configuration and runtime settings for backend auto-upgrade: - RuntimeSettings field for dynamic config via API/JSON - ApplicationConfig field, option func, and roundtrip conversion - CLI flag with LOCALAI_AUTO_UPGRADE_BACKENDS env var - Config file watcher support for runtime_settings.json - Tests for ToRuntimeSettings, ApplyRuntimeSettings, and roundtrip * feat(ui): add backend version display and upgrade support - Add upgrade check/trigger API endpoints to config and api module - Backends page: version badge, upgrade indicator, upgrade button - Manage page: version in metadata, context-aware upgrade/reinstall button - Settings page: auto-upgrade backends toggle * feat: add upgrade checker service, API endpoints, and CLI command - UpgradeChecker background service: checks every 6h, auto-upgrades when enabled - API endpoints: GET /backends/upgrades, POST /backends/upgrades/check, POST /backends/upgrade/:name - CLI: `localai backends upgrade` command, version display in `backends list` - BackendManager interface: add UpgradeBackend and CheckUpgrades methods - Wire upgrade op through GalleryService backend handler - Distributed mode: fan-out upgrade to worker nodes via NATS * fix: use advisory lock for upgrade checker in distributed mode In distributed mode with multiple frontend instances, use PostgreSQL advisory lock (KeyBackendUpgradeCheck) so only one instance runs periodic upgrade checks and auto-upgrades. Prevents duplicate upgrade operations across replicas. Standalone mode is unchanged (simple ticker loop). * test: add e2e tests for backend upgrade API - Test GET /api/backends/upgrades returns 200 (even with no upgrade checker) - Test POST /api/backends/upgrade/:name accepts request and returns job ID - Test full upgrade flow: trigger upgrade via API, wait for job completion, verify run.sh updated to v2 and metadata.json has version 2.0.0 - Test POST /api/backends/upgrades/check returns 200 - Fix nil check for applicationInstance in upgrade API routes
2026-04-11 20:31:15 +00:00
application.upgradeChecker = uc
feat: add biometrics UI (#9524) * feat(react-ui): add Face & Voice Recognition pages Expose the face and voice biometrics endpoints (/v1/face/*, /v1/voice/*) through the React UI. Each page has four tabs driving the six endpoints per modality: Analyze (demographics with bounding boxes / waveform segments), Compare (verify with a match gauge and live threshold slider), Enrollment (register / identify / forget with a top-K matches view), Embedding (raw vector inspector with sparkline + copy). MediaInput supports file upload plus live capture: webcam snap-to-canvas for face, MediaRecorder -> AudioContext -> 16-bit PCM mono WAV transcode for voice (libsndfile on the backend only handles WAV/FLAC/OGG natively). Sidebar gets a new Biometrics section feature-gated on face_recognition / voice_recognition; routes are wrapped in <RequireFeature>. No new dependencies -- Font Awesome icons picked from the Free set. Assisted-by: Claude:Opus 4.7 * fix(localai): accept data URI prefixes with codec/charset params Browser MediaRecorder produces data URIs like data:audio/webm;codecs=opus;base64,... so the pre-';base64,' section can carry multiple parameter segments. The `^data:([^;]+);base64,` regex in pkg/utils/base64.go and core/http/endpoints/localai/audio.go only matched exactly one segment, so recordings straight from the React UI's live-capture tab failed the strip and then tripped the base64 decoder on the leading 'data:' literal, surfacing as "invalid audio base64: illegal base64 data at input byte 4" Widened both regexes to `^data:[^,]+?;base64,` so any number of ';param=value' segments between the mime type and ';base64,' are tolerated. Added a regression test covering the MediaRecorder shape. Assisted-by: Claude:Opus 4.7 * fix(insightface): scope pack ONNX loading to known manifests LocalAI's gallery extracts buffalo_* zips flat into the models directory, which inevitably mixes with ONNX files from other backends (opencv face engine, MiniFASNet antispoof, WeSpeaker voice embedding) and older buffalo pack installs. Feeding those foreign files into insightface's model_zoo.get_model() blows up inside the router -- it assumes a 4-D NCHW input and indexes `input_shape[2]` on tensors that aren't shaped like a face model, raising IndexError mid-load and leaving the backend unusable. The router's dispatch isn't amenable to per-file try/except alone (first-file-wins picks det_10g.onnx from buffalo_l even when the user asked for buffalo_sc -- alphabetical order happens to favour the wrong pack). Instead, ship an explicit manifest of the upstream v0.7 pack contents and scope the glob to that when the requested pack is known. The manifest is small and stable; future packs can be added alongside or fall through to the tolerance loop, which also swallows any remaining IndexError / ValueError from foreign files with a clear `[insightface] skipped` stderr line for diagnostics. Assisted-by: Claude:Opus 4.7 * fix(speaker-recognition): extract FBank features for rank-3 ONNX encoders Pre-exported speaker-encoder ONNX graphs come in two shapes: rank-2 [batch, samples] -- some 3D-Speaker exports, take raw waveform directly. rank-3 [batch, frames, n_mels] -- WeSpeaker and most Kaldi- lineage encoders, expect pre-computed Kaldi FBank. OnnxDirectEngine unconditionally fed `audio.reshape(1, -1)` -- correct for rank-2, IndexError-on-input_shape[3] on rank-3, which surfaced to the UI as "Invalid rank for input: feats Got: 2 Expected: 3" Detect the input rank at session init and run Kaldi FBank (80-dim, 25ms/10ms frames, dither=0.0, per-utterance CMN) before the forward pass when rank>=3. All knobs are configurable via backend options for encoders that deviate from defaults. torchaudio.compliance.kaldi is already in the backend's requirements (SpeechBrain pulls torchaudio in), so no new dependency. Assisted-by: Claude:Opus 4.7 * fix(biometrics): isolate face and voice vector stores Face (ArcFace, 512-D) and voice (ECAPA-TDNN 192-D / WeSpeaker 256-D) biometric embeddings were colliding inside a single in-memory local-store instance. Enrolling one after the other failed with "Try to add key with length N when existing length is M" because local-store correctly refuses to mix dimensions in one keyspace. The registries were constructed with `storeName=""`, which in StoreBackend() is just a WithModel() call. But ModelLoader's cache is keyed on `modelID`, not `model` -- so both registries collapsed to the same `modelID=""` slot and reused the same backend process despite looking isolated on paper. Three complementary fixes: 1. application.go -- give each registry a distinct default namespace ("localai-face-biometrics" / "localai-voice-biometrics"). The comment claimed isolation, now it's actually enforced. 2. stores.go -- pass the storeName as both WithModelID and WithModel so the ModelLoader cache key separates namespaces and the loader spawns distinct processes. 3. local-store/store.go -- drop the Load() `opts.Model != ""` guard. It was there to prevent generic model-loading loops from picking up local-store by accident, but that auto-load path is being retired; the guard now just blocks legitimate namespace isolation. opts.Model is treated as a tag; the per-tuple process isolation upstream handles discrimination. Assisted-by: Claude:Opus 4.7 * fix(gallery): stale-file cleanup and upgrade-tmp directory safety Two related robustness fixes for backend install/upgrade: pkg/downloader/uri.go OCI downloads passed through if filepath.Ext(filePath) != "" ... filePath = filepath.Dir(filePath) which was intended to redirect file-shaped download targets into their parent directory for OCI extraction. The heuristic misfires on directory-shaped paths with a dot-suffix -- gallery.UpgradeBackend uses tmpPath = "<backendsPath>/<name>.upgrade-tmp" and Go's filepath.Ext treats ".upgrade-tmp" as an extension. The rewrite landed the extraction at "<backendsPath>/", which then **overwrote the real install** (backends/<name>/) with a flat-layout file and left a stray run.sh at the top level. The tmp dir itself stayed empty, so the validation step that checked "<tmpPath>/run.sh" predictably failed with "upgrade validation failed: run.sh not found in new backend" Every manual upgrade silently corrupted the backends tree this way. Guard the rewrite behind "target isn't already an existing directory" -- InstallBackend / UpgradeBackend both pre-create the target as a directory, so they get the correct behaviour; existing file-path callers with a genuine dot-extension still get the parent redirect. core/gallery/backends.go InstallBackend's MkdirAll returned ENOTDIR when something at the target path was already a file (legacy dev builds dropped golang backend binaries directly at `<backendsPath>/<name>` instead of nesting them under their own subdir). That permanently blocked reinstall and upgrade for anyone carrying that state, since every retry hit the same error. Detect a pre-existing non-directory, warn, and remove it before the MkdirAll so the fresh install can write the correct nested layout with metadata.json + run.sh. Assisted-by: Claude:Opus 4.7 * fix(galleryop): refresh upgrade cache after backend ops UpgradeChecker caches the last upgrade-check result and only refreshes on the 6-hour tick or after an auto-upgrade cycle. Manual upgrades (POST /api/backends/upgrade/:name) go through the async galleryop worker, which completes the upgrade correctly but never tells UpgradeChecker to re-check -- so /api/backends/upgrades continued to list a just-upgraded backend as upgradeable, indistinguishable from a failed upgrade, for up to six hours. Add an optional `OnBackendOpCompleted func()` hook on GalleryService that fires after every successful install / upgrade / delete on the backend channel (async, so a slow callback doesn't stall the queue). startup.go wires it to UpgradeChecker.TriggerCheck after both services exist. Result: the upgrade banner clears within milliseconds of the worker finishing. Assisted-by: Claude:Opus 4.7 * build: prepend GOPATH/bin to PATH for protogen-go install-go-tools runs `go install` for protoc-gen-go and protoc-gen-go-grpc, which writes them into `go env GOPATH`/bin. That directory isn't on every dev's PATH, and protoc resolves its code-gen plugins via PATH, so the immediately-following protoc invocation fails with "protoc-gen-go: program not found" which in turn blocks `make build` and any `make backends/%` target that depends on build. Prepend `go env GOPATH`/bin to PATH for the protoc invocation so the freshly-installed plugins are found without requiring a shell-profile change. Assisted-by: Claude:Opus 4.7 * refactor(ui-api): non-blocking backend upgrade handler with opcache POST /api/backends/upgrade/:name used to send the ManagementOp directly onto the unbuffered BackendGalleryChannel, which blocked the HTTP request whenever the galleryop worker was busy with a prior operation. The op also didn't show up in /api/operations, so the Backends UI couldn't reflect upgrade progress on the affected row. Register the op in opcache immediately, wrap it in a cancellable context, store the cancellation function on the GalleryService, and push onto the channel from a goroutine so the handler returns right away. Response gains a `jobID` field and a `message` string so clients have a consistent handle regardless of whether the op is queued or running. Pairs with the OnBackendOpCompleted hook added in the galleryop commit — together the UI sees the upgrade start, watches progress via /api/operations, and drops the "upgradeable" flag the moment the worker finishes. Assisted-by: Claude:Opus 4.7
2026-04-24 06:50:34 +00:00
// Refresh the upgrade cache the moment a backend op finishes — otherwise
// the UI keeps showing a just-upgraded backend as upgradeable until the
// next 6-hour tick. TriggerCheck is non-blocking.
if gs := application.GalleryService(); gs != nil {
gs.OnBackendOpCompleted = uc.TriggerCheck
}
feat: backend versioning, upgrade detection and auto-upgrade (#9315) * feat: add backend versioning data model foundation Add Version, URI, and Digest fields to BackendMetadata for tracking installed backend versions and enabling upgrade detection. Add Version field to GalleryBackend. Add UpgradeAvailable/AvailableVersion fields to SystemBackend. Implement GetImageDigest() for lightweight OCI digest lookups via remote.Head. Record version, URI, and digest at install time in InstallBackend() and propagate version through meta backends. * feat: add backend upgrade detection and execution logic Add CheckBackendUpgrades() to compare installed backend versions/digests against gallery entries, and UpgradeBackend() to perform atomic upgrades with backup-based rollback on failure. Includes Agent A's data model changes (Version/URI/Digest fields, GetImageDigest). * feat: add AutoUpgradeBackends config and runtime settings Add configuration and runtime settings for backend auto-upgrade: - RuntimeSettings field for dynamic config via API/JSON - ApplicationConfig field, option func, and roundtrip conversion - CLI flag with LOCALAI_AUTO_UPGRADE_BACKENDS env var - Config file watcher support for runtime_settings.json - Tests for ToRuntimeSettings, ApplyRuntimeSettings, and roundtrip * feat(ui): add backend version display and upgrade support - Add upgrade check/trigger API endpoints to config and api module - Backends page: version badge, upgrade indicator, upgrade button - Manage page: version in metadata, context-aware upgrade/reinstall button - Settings page: auto-upgrade backends toggle * feat: add upgrade checker service, API endpoints, and CLI command - UpgradeChecker background service: checks every 6h, auto-upgrades when enabled - API endpoints: GET /backends/upgrades, POST /backends/upgrades/check, POST /backends/upgrade/:name - CLI: `localai backends upgrade` command, version display in `backends list` - BackendManager interface: add UpgradeBackend and CheckUpgrades methods - Wire upgrade op through GalleryService backend handler - Distributed mode: fan-out upgrade to worker nodes via NATS * fix: use advisory lock for upgrade checker in distributed mode In distributed mode with multiple frontend instances, use PostgreSQL advisory lock (KeyBackendUpgradeCheck) so only one instance runs periodic upgrade checks and auto-upgrades. Prevents duplicate upgrade operations across replicas. Standalone mode is unchanged (simple ticker loop). * test: add e2e tests for backend upgrade API - Test GET /api/backends/upgrades returns 200 (even with no upgrade checker) - Test POST /api/backends/upgrade/:name accepts request and returns job ID - Test full upgrade flow: trigger upgrade via API, wait for job completion, verify run.sh updated to v2 and metadata.json has version 2.0.0 - Test POST /api/backends/upgrades/check returns 200 - Fix nil check for applicationInstance in upgrade API routes
2026-04-11 20:31:15 +00:00
go uc.Run(options.Context)
}
// Wire gallery generation counter into VRAM caches so they invalidate
// when gallery data refreshes instead of using a fixed TTL.
vram.SetGalleryGenerationFunc(gallery.GalleryGeneration)
if options.ConfigFile != "" {
if err := application.ModelConfigLoader().LoadMultipleModelConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
xlog.Error("error loading config file", "error", err)
}
}
if err := application.ModelConfigLoader().Preload(options.SystemState.Model.ModelsPath); err != nil {
xlog.Error("error downloading models", "error", err)
}
if options.PreloadJSONModels != "" {
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
if err := galleryop.ApplyGalleryFromString(options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, options.Galleries, options.BackendGalleries, options.PreloadJSONModels); err != nil {
return nil, err
}
}
if options.PreloadModelsFromPath != "" {
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
if err := galleryop.ApplyGalleryFromFile(options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, options.Galleries, options.BackendGalleries, options.PreloadModelsFromPath); err != nil {
return nil, err
}
}
if options.Debug {
for _, v := range application.ModelConfigLoader().GetAllModelsConfigs() {
xlog.Debug("Model", "name", v.Name, "config", v)
}
}
// Load runtime settings from file if DynamicConfigsDir is set
// This applies file settings with env var precedence (env vars take priority)
// Note: startupConfigCopy was already created above, so it has the original env var values
if options.DynamicConfigsDir != "" {
loadRuntimeSettingsFromFile(options)
}
application.ModelLoader().SetBackendLoggingEnabled(options.EnableBackendLogging)
// turn off any process that was started by GRPC if the context is canceled
go func() {
<-options.Context.Done()
xlog.Debug("Context canceled, shutting down")
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
application.distributed.Shutdown()
err := application.ModelLoader().StopAllGRPC()
if err != nil {
xlog.Error("error while stopping all grpc backends", "error", err)
}
}()
// Initialize watchdog with current settings (after loading from file)
initializeWatchdog(application, options)
if options.LoadToMemory != nil && !options.SingleBackend {
for _, m := range options.LoadToMemory {
cfg, err := application.ModelConfigLoader().LoadModelConfigFileByNameDefaultOptions(m, options)
if err != nil {
return nil, err
}
xlog.Debug("Auto loading model into memory from file", "model", m, "file", cfg.Model)
o := backend.ModelOptions(*cfg, options)
var backendErr error
_, backendErr = application.ModelLoader().Load(o...)
if backendErr != nil {
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-29 22:47:27 +00:00
return nil, backendErr
}
}
}
// Watch the configuration directory
startWatcher(options)
xlog.Info("core/startup process completed!")
return application, nil
}
func startWatcher(options *config.ApplicationConfig) {
if options.DynamicConfigsDir == "" {
// No need to start the watcher if the directory is not set
return
}
if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
if os.IsNotExist(err) {
// We try to create the directory if it does not exist and was specified
if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
xlog.Error("failed creating DynamicConfigsDir", "error", err)
}
} else {
// something else happened, we log the error and don't start the watcher
xlog.Error("failed to read DynamicConfigsDir, watcher will not be started", "error", err)
return
}
}
configHandler := newConfigFileHandler(options)
if err := configHandler.Watch(); err != nil {
xlog.Error("failed creating watcher", "error", err)
}
}
// loadRuntimeSettingsFromFile loads settings from runtime_settings.json with env var precedence
// This function is called at startup, before env vars are applied via AppOptions.
// Since env vars are applied via AppOptions in run.go, we need to check if they're set.
// We do this by checking if the current options values differ from defaults, which would
// indicate they were set from env vars. However, a simpler approach is to just apply
// file settings here, and let the AppOptions (which are applied after this) override them.
// But actually, this is called AFTER AppOptions are applied in New(), so we need to check env vars.
// The cleanest solution: Store original values before applying file, or check if values match
// what would be set from env vars. For now, we'll apply file settings and they'll be
// overridden by AppOptions if env vars were set (but AppOptions are already applied).
// Actually, this function is called in New() before AppOptions are fully processed for watchdog.
// Let's check the call order: New() -> loadRuntimeSettingsFromFile() -> initializeWatchdog()
// But AppOptions are applied in NewApplicationConfig() which is called first.
// So at this point, options already has values from env vars. We should compare against
// defaults to see if env vars were set. But we don't have defaults stored.
// Simplest: Just apply file settings. If env vars were set, they're already in options.
// The file watcher handler will handle runtime changes properly by comparing with startupAppConfig.
func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
settingsFile := filepath.Join(options.DynamicConfigsDir, "runtime_settings.json")
fileContent, err := os.ReadFile(settingsFile)
if err != nil {
if os.IsNotExist(err) {
xlog.Debug("runtime_settings.json not found, using defaults")
return
}
xlog.Warn("failed to read runtime_settings.json", "error", err)
return
}
var settings config.RuntimeSettings
if err := json.Unmarshal(fileContent, &settings); err != nil {
xlog.Warn("failed to parse runtime_settings.json", "error", err)
return
}
// At this point, options already has values from env vars (via AppOptions in run.go).
// To avoid env var duplication, we determine if env vars were set by checking if
// current values differ from defaults. Defaults are: false for bools, 0 for durations.
// If current value is at default, it likely wasn't set from env var, so we can apply file.
// If current value is non-default, it was likely set from env var, so we preserve it.
// Note: This means env vars explicitly setting to false/0 won't be distinguishable from defaults,
// but that's an acceptable limitation to avoid env var duplication.
if settings.WatchdogIdleEnabled != nil {
// Only apply if current value is default (false), suggesting it wasn't set from env var
if !options.WatchDogIdle {
options.WatchDogIdle = *settings.WatchdogIdleEnabled
if options.WatchDogIdle {
options.WatchDog = true
}
}
}
if settings.WatchdogBusyEnabled != nil {
if !options.WatchDogBusy {
options.WatchDogBusy = *settings.WatchdogBusyEnabled
if options.WatchDogBusy {
options.WatchDog = true
}
}
}
if settings.WatchdogIdleTimeout != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.WatchDogIdleTimeout == 0 {
dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
if err == nil {
options.WatchDogIdleTimeout = dur
} else {
xlog.Warn("invalid watchdog idle timeout in runtime_settings.json", "error", err, "timeout", *settings.WatchdogIdleTimeout)
}
}
}
if settings.WatchdogBusyTimeout != nil {
if options.WatchDogBusyTimeout == 0 {
dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
if err == nil {
options.WatchDogBusyTimeout = dur
} else {
xlog.Warn("invalid watchdog busy timeout in runtime_settings.json", "error", err, "timeout", *settings.WatchdogBusyTimeout)
}
}
}
if settings.WatchdogInterval != nil {
if options.WatchDogInterval == 0 {
dur, err := time.ParseDuration(*settings.WatchdogInterval)
if err == nil {
options.WatchDogInterval = dur
} else {
xlog.Warn("invalid watchdog interval in runtime_settings.json", "error", err, "interval", *settings.WatchdogInterval)
options.WatchDogInterval = model.DefaultWatchdogInterval
}
}
}
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
if settings.MaxActiveBackends != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.MaxActiveBackends == 0 {
options.MaxActiveBackends = *settings.MaxActiveBackends
// For backward compatibility, also set SingleBackend if MaxActiveBackends == 1
options.SingleBackend = (*settings.MaxActiveBackends == 1)
}
} else if settings.SingleBackend != nil {
// Legacy: SingleBackend maps to MaxActiveBackends = 1
if !options.SingleBackend {
options.SingleBackend = *settings.SingleBackend
if *settings.SingleBackend {
options.MaxActiveBackends = 1
}
}
}
if settings.MemoryReclaimerEnabled != nil {
// Only apply if current value is default (false), suggesting it wasn't set from env var
if !options.MemoryReclaimerEnabled {
options.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
if options.MemoryReclaimerEnabled {
options.WatchDog = true // Memory reclaimer requires watchdog
}
}
}
if settings.MemoryReclaimerThreshold != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.MemoryReclaimerThreshold == 0 {
options.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
}
}
if settings.ForceEvictionWhenBusy != nil {
// Only apply if current value is default (false), suggesting it wasn't set from env var
if !options.ForceEvictionWhenBusy {
options.ForceEvictionWhenBusy = *settings.ForceEvictionWhenBusy
}
}
if settings.LRUEvictionMaxRetries != nil {
// Only apply if current value is default (30), suggesting it wasn't set from env var
if options.LRUEvictionMaxRetries == 0 {
options.LRUEvictionMaxRetries = *settings.LRUEvictionMaxRetries
}
}
if settings.LRUEvictionRetryInterval != nil {
// Only apply if current value is default (1s), suggesting it wasn't set from env var
if options.LRUEvictionRetryInterval == 0 {
dur, err := time.ParseDuration(*settings.LRUEvictionRetryInterval)
if err == nil {
options.LRUEvictionRetryInterval = dur
} else {
xlog.Warn("invalid LRU eviction retry interval in runtime_settings.json", "error", err, "interval", *settings.LRUEvictionRetryInterval)
}
}
}
if settings.AgentJobRetentionDays != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.AgentJobRetentionDays == 0 {
options.AgentJobRetentionDays = *settings.AgentJobRetentionDays
}
}
if !options.WatchDogIdle && !options.WatchDogBusy {
if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
options.WatchDog = true
}
}
// P2P settings
if settings.P2PToken != nil {
if options.P2PToken == "" {
options.P2PToken = *settings.P2PToken
}
}
if settings.P2PNetworkID != nil {
if options.P2PNetworkID == "" {
options.P2PNetworkID = *settings.P2PNetworkID
}
}
if settings.Federated != nil {
if !options.Federated {
options.Federated = *settings.Federated
}
}
if settings.EnableBackendLogging != nil {
if !options.EnableBackendLogging {
options.EnableBackendLogging = *settings.EnableBackendLogging
}
}
// Tracing settings
if settings.EnableTracing != nil {
if !options.EnableTracing {
options.EnableTracing = *settings.EnableTracing
}
}
if settings.TracingMaxItems != nil {
if options.TracingMaxItems == 0 {
options.TracingMaxItems = *settings.TracingMaxItems
}
}
feat(branding): admin-configurable instance name, tagline, and assets (#9635) Adds a whitelabeling feature so an operator can replace the LocalAI instance name, tagline, square logo, horizontal logo, and favicon from the admin Settings page. Defaults fall back to the bundled assets so existing installs are unaffected. The public GET /api/branding endpoint is reachable pre-auth so the login screen can render the configured branding before sign-in. Mutating routes (POST/DELETE /api/branding/asset/:kind) remain admin-only. Text fields (instance_name, instance_tagline) ride the existing /api/settings flow; binary assets get a dedicated multipart upload route that persists files under DynamicConfigsDir/branding/. To prevent the Settings page's stale local state from clobbering an upload on save, UpdateSettingsEndpoint preserves whatever the on-disk asset filename fields are regardless of the body — /api/branding/asset/* are the sole writers for those fields. The MCP catalog gains get_branding and set_branding tools (text fields only; file upload stays UI-only) plus a configure_branding skill prompt. While wiring this up, the same restart-loss class of bug surfaced for several existing fields whose RuntimeSettings entries were never read by the startup loader. Fix loadRuntimeSettingsFromFile() to load: - branding (instance_name, instance_tagline, *_file basenames) - auto_upgrade_backends, prefer_development_backends - localai_assistant_enabled - open_responses_store_ttl - the 7 existing AgentPool fields (enabled, default/embedding model, chunking sizes, enable_logs, collection_db_path) Also exposes 3 new AgentPool runtime settings (vector_engine, database_url, agent_hub_url) via /api/settings + the Settings UI, with the same load-on-startup wiring. The file watcher's manual-edit path is intentionally not changed — the in-process API endpoints already update appConfig directly, so the watcher is redundant for supported flows and a separate refactor for everything else. 15 TDD specs cover the loader behaviour (1 branding + 11 adjacent + 3 new agent-pool); 2 specs cover the persistence helpers and the clobber-prevention contract. Assisted-by: claude-code:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-02 13:51:36 +00:00
// Branding / whitelabeling. There are no env vars for these — the file is
// the only source — so apply unconditionally. Without this block a server
// restart silently drops the configured instance name, tagline, and asset
// filenames.
if settings.InstanceName != nil {
options.Branding.InstanceName = *settings.InstanceName
}
if settings.InstanceTagline != nil {
options.Branding.InstanceTagline = *settings.InstanceTagline
}
if settings.LogoFile != nil {
options.Branding.LogoFile = *settings.LogoFile
}
if settings.LogoHorizontalFile != nil {
options.Branding.LogoHorizontalFile = *settings.LogoHorizontalFile
}
if settings.FaviconFile != nil {
options.Branding.FaviconFile = *settings.FaviconFile
}
// Backend upgrade flags
if settings.AutoUpgradeBackends != nil {
if !options.AutoUpgradeBackends {
options.AutoUpgradeBackends = *settings.AutoUpgradeBackends
}
}
if settings.PreferDevelopmentBackends != nil {
if !options.PreferDevelopmentBackends {
options.PreferDevelopmentBackends = *settings.PreferDevelopmentBackends
}
}
// LocalAI Assistant — file-stored as the negation (LocalAIAssistantEnabled).
// Default is enabled (DisableLocalAIAssistant=false). Apply the file value
// unless env explicitly disabled the assistant (DisableLocalAIAssistant=true).
if settings.LocalAIAssistantEnabled != nil {
if !options.DisableLocalAIAssistant {
options.DisableLocalAIAssistant = !*settings.LocalAIAssistantEnabled
}
}
// Open Responses TTL. Default is 0 (no expiration). Treat the on-disk
// "0"/empty as "no expiration" — a no-op since options is already 0 —
// and parse anything else as a duration.
if settings.OpenResponsesStoreTTL != nil && options.OpenResponsesStoreTTL == 0 {
v := *settings.OpenResponsesStoreTTL
if v != "0" && v != "" {
if dur, err := time.ParseDuration(v); err == nil {
options.OpenResponsesStoreTTL = dur
} else {
xlog.Warn("invalid open_responses_store_ttl in runtime_settings.json", "error", err, "ttl", v)
}
}
}
// Agent Pool. NewApplicationConfig seeds non-zero defaults for some of
// these fields (Enabled=true, EmbeddingModel="granite-embedding-107m-
// multilingual", MaxChunkingSize=400). The "if at default, apply file"
// gate uses each field's actual default literal so file values can
// override the bootstrap default while still letting an env-set value
// (e.g. WithAgentPoolEmbeddingModel from a flag) win.
if settings.AgentPoolEnabled != nil && options.AgentPool.Enabled {
options.AgentPool.Enabled = *settings.AgentPoolEnabled
}
if settings.AgentPoolDefaultModel != nil && options.AgentPool.DefaultModel == "" {
options.AgentPool.DefaultModel = *settings.AgentPoolDefaultModel
}
if settings.AgentPoolEmbeddingModel != nil {
if options.AgentPool.EmbeddingModel == "" || options.AgentPool.EmbeddingModel == "granite-embedding-107m-multilingual" {
options.AgentPool.EmbeddingModel = *settings.AgentPoolEmbeddingModel
}
}
if settings.AgentPoolMaxChunkingSize != nil {
if options.AgentPool.MaxChunkingSize == 0 || options.AgentPool.MaxChunkingSize == 400 {
options.AgentPool.MaxChunkingSize = *settings.AgentPoolMaxChunkingSize
}
}
if settings.AgentPoolChunkOverlap != nil && options.AgentPool.ChunkOverlap == 0 {
options.AgentPool.ChunkOverlap = *settings.AgentPoolChunkOverlap
}
if settings.AgentPoolEnableLogs != nil && !options.AgentPool.EnableLogs {
options.AgentPool.EnableLogs = *settings.AgentPoolEnableLogs
}
if settings.AgentPoolCollectionDBPath != nil && options.AgentPool.CollectionDBPath == "" {
options.AgentPool.CollectionDBPath = *settings.AgentPoolCollectionDBPath
}
if settings.AgentPoolVectorEngine != nil {
// Default is "chromem"; treat both that and empty as "not env-set".
if options.AgentPool.VectorEngine == "" || options.AgentPool.VectorEngine == "chromem" {
options.AgentPool.VectorEngine = *settings.AgentPoolVectorEngine
}
}
if settings.AgentPoolDatabaseURL != nil && options.AgentPool.DatabaseURL == "" {
options.AgentPool.DatabaseURL = *settings.AgentPoolDatabaseURL
}
if settings.AgentPoolAgentHubURL != nil {
// Default is "https://agenthub.localai.io"; treat both that and empty
// as "not env-set".
if options.AgentPool.AgentHubURL == "" || options.AgentPool.AgentHubURL == "https://agenthub.localai.io" {
options.AgentPool.AgentHubURL = *settings.AgentPoolAgentHubURL
}
}
xlog.Debug("Runtime settings loaded from runtime_settings.json")
}
// initializeWatchdog initializes the watchdog with current ApplicationConfig settings
func initializeWatchdog(application *Application, options *config.ApplicationConfig) {
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
lruLimit := options.GetEffectiveMaxActiveBackends()
// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
if options.WatchDog || lruLimit > 0 || options.MemoryReclaimerEnabled {
wd := model.NewWatchDog(
model.WithProcessManager(application.ModelLoader()),
model.WithBusyTimeout(options.WatchDogBusyTimeout),
model.WithIdleTimeout(options.WatchDogIdleTimeout),
model.WithWatchdogInterval(options.WatchDogInterval),
model.WithBusyCheck(options.WatchDogBusy),
model.WithIdleCheck(options.WatchDogIdle),
model.WithLRULimit(lruLimit),
model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold),
model.WithForceEvictionWhenBusy(options.ForceEvictionWhenBusy),
)
application.ModelLoader().SetWatchDog(wd)
// Initialize ModelLoader LRU eviction retry settings
application.ModelLoader().SetLRUEvictionRetrySettings(
options.LRUEvictionMaxRetries,
options.LRUEvictionRetryInterval,
)
feat(concurrency-groups): per-model exclusive groups for backend loading (#9662) * feat(concurrency-groups): per-model exclusive groups for backend loading Adds `concurrency_groups: [...]` to model YAML configs. Two models that share a group cannot be loaded concurrently on the same node — loading one evicts the others, reusing the existing pinned/busy/retry policy from LRU eviction. Layered design: - Watchdog (pkg/model): per-node correctness floor — on every Load(), evict any loaded model that shares a group with the requested one. Pinned skips surface NeedMore so the loader retries (and ultimately logs a clear warning), instead of silently allowing the rule to be violated. - Distributed scheduler (core/services/nodes): soft anti-affinity hint — scheduleNewModel prefers nodes that don't already host a same-group model, falling back to eviction only if every candidate has a conflict. Composes with NodeSelector at the same point in the candidate pipeline. Per-node, not cluster-wide: VRAM is a node-local resource, and two heavy models running on different nodes is fine. The ConfigLoader is wired into SmartRouter via a small ConcurrencyConflictResolver interface so the nodes package keeps a narrow surface on core/config. Refactors the inner LRU eviction body into a shared collectEvictionsLocked helper and the loader retry loop into retryEnforce(fn, maxRetries, interval), so both LRU and group enforcement share busy/pinned/retry semantics. Closes #9659. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(watchdog): sync pinned + concurrency_groups at startup The startup-time watchdog setup lives in initializeWatchdog (startup.go), not in startWatchdog (watchdog.go). The latter is only invoked from the runtime-settings RestartWatchdog path. As a result, neither SyncPinnedModelsToWatchdog nor SyncModelGroupsToWatchdog ran at boot, so `pinned: true` and `concurrency_groups: [...]` only became effective after a settings-driven watchdog restart. Fix by adding both sync calls to initializeWatchdog. Confirmed end-to-end: loading model A in group "heavy", then C with no group (coexists), then B in group "heavy" now correctly evicts A and leaves [B, C]. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(test): satisfy errcheck on new os.Remove in concurrency_groups spec CI lint runs new-from-merge-base, so the existing pre-existing `defer os.Remove(tmp.Name())` lines are baseline-grandfathered but the one introduced by the concurrency_groups YAML round-trip test is held to errcheck. Wrap the remove in a closure that discards the error. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-05 06:42:50 +00:00
// Sync per-model state from configs to the watchdog. Without this,
// `pinned: true` and `concurrency_groups:` are only honored after a
// settings-driven RestartWatchdog and never at boot.
application.SyncPinnedModelsToWatchdog()
application.SyncModelGroupsToWatchdog()
// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
// But memory reclaimer needs the Run() loop for periodic checking
if options.WatchDogBusy || options.WatchDogIdle || options.MemoryReclaimerEnabled {
go wd.Run()
}
go func() {
<-options.Context.Done()
xlog.Debug("Context canceled, shutting down")
wd.Shutdown()
}()
}
}
// loadOrGenerateHMACSecret loads an HMAC secret from the given file path,
// or generates a random 32-byte secret and persists it if the file doesn't exist.
func loadOrGenerateHMACSecret(path string) (string, error) {
data, err := os.ReadFile(path)
if err == nil {
secret := string(data)
if len(secret) >= 32 {
return secret, nil
}
}
b := make([]byte, 32)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("failed to generate HMAC secret: %w", err)
}
secret := hex.EncodeToString(b)
if err := os.WriteFile(path, []byte(secret), 0600); err != nil {
return "", fmt.Errorf("failed to persist HMAC secret: %w", err)
}
xlog.Info("Generated new HMAC secret for API key hashing", "path", path)
return secret, nil
}
// migrateDataFiles moves persistent data files from the old config directory
// to the new data directory. Only moves files that exist in src but not in dst.
func migrateDataFiles(srcDir, dstDir string) {
// Files and directories to migrate
items := []string{
"agent_tasks.json",
"agent_jobs.json",
"collections",
"assets",
}
migrated := false
for _, item := range items {
srcPath := filepath.Join(srcDir, item)
dstPath := filepath.Join(dstDir, item)
// Only migrate if source exists and destination does not
if _, err := os.Stat(srcPath); os.IsNotExist(err) {
continue
}
if _, err := os.Stat(dstPath); err == nil {
continue // destination already exists, skip
}
if err := os.Rename(srcPath, dstPath); err != nil {
xlog.Warn("Failed to migrate data file, will copy instead", "src", srcPath, "dst", dstPath, "error", err)
// os.Rename fails across filesystems, fall back to leaving in place
// and log a warning for the user to manually move
xlog.Warn("Data file remains in old location, please move manually", "src", srcPath, "dst", dstPath)
continue
}
migrated = true
xlog.Info("Migrated data file to new data path", "src", srcPath, "dst", dstPath)
}
if migrated {
xlog.Info("Data migration complete", "from", srcDir, "to", dstDir)
}
}