2023-04-11 22:02:39 +00:00
package model
2023-04-07 09:30:59 +00:00
import (
2023-07-14 23:19:43 +00:00
"context"
2023-04-07 09:30:59 +00:00
"fmt"
2025-06-15 12:56:52 +00:00
"maps"
2023-04-07 09:30:59 +00:00
"os"
"path/filepath"
2023-04-10 10:02:40 +00:00
"strings"
2023-04-07 09:30:59 +00:00
"sync"
2024-08-30 13:20:39 +00:00
"time"
2023-04-07 09:30:59 +00:00
2025-08-14 17:38:26 +00:00
"github.com/mudler/LocalAI/pkg/system"
2024-06-23 08:24:36 +00:00
"github.com/mudler/LocalAI/pkg/utils"
2024-04-19 02:40:18 +00:00
2025-12-21 18:33:13 +00:00
"github.com/mudler/xlog"
2023-04-07 09:30:59 +00:00
)
2023-07-22 15:31:39 +00:00
// new idea: what if we declare a struct of these here, and use a loop to check?
2025-05-16 10:45:48 +00:00
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we separate directories for .bin/.yaml and .tmpl
2026-03-11 06:30:49 +00:00
// ModelUnloadHook is called when a model is about to be unloaded.
// The model name is passed as the argument.
type ModelUnloadHook func ( modelName string )
2023-04-07 09:30:59 +00:00
type ModelLoader struct {
2025-12-25 13:26:18 +00:00
ModelPath string
mu sync . Mutex
models map [ string ] * Model
loading map [ string ] chan struct { } // tracks models currently being loaded
wd * WatchDog
externalBackends map [ string ] string
lruEvictionMaxRetries int // Maximum number of retries when waiting for busy models
lruEvictionRetryInterval time . Duration // Interval between retries when waiting for busy models
2026-03-11 06:30:49 +00:00
onUnloadHooks [ ] ModelUnloadHook
2023-04-07 09:30:59 +00:00
}
2025-12-12 11:28:38 +00:00
// NewModelLoader creates a new ModelLoader instance.
// LRU eviction is now managed through the WatchDog component.
func NewModelLoader ( system * system . SystemState ) * ModelLoader {
2023-07-22 15:31:39 +00:00
nml := & ModelLoader {
2025-12-25 13:26:18 +00:00
ModelPath : system . Model . ModelsPath ,
models : make ( map [ string ] * Model ) ,
loading : make ( map [ string ] chan struct { } ) ,
externalBackends : make ( map [ string ] string ) ,
lruEvictionMaxRetries : 30 , // Default: 30 retries
lruEvictionRetryInterval : 1 * time . Second , // Default: 1 second
2023-04-20 22:06:55 +00:00
}
2023-11-26 17:36:23 +00:00
2023-07-22 15:31:39 +00:00
return nml
2023-04-07 09:30:59 +00:00
}
2025-12-12 11:28:38 +00:00
// GetLoadingCount returns the number of models currently being loaded
func ( ml * ModelLoader ) GetLoadingCount ( ) int {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
return len ( ml . loading )
}
2026-03-11 06:30:49 +00:00
// OnModelUnload registers a hook that is called when a model is unloaded.
func ( ml * ModelLoader ) OnModelUnload ( hook ModelUnloadHook ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
ml . onUnloadHooks = append ( ml . onUnloadHooks , hook )
}
2023-11-26 17:36:23 +00:00
func ( ml * ModelLoader ) SetWatchDog ( wd * WatchDog ) {
ml . wd = wd
}
2025-11-20 21:37:20 +00:00
func ( ml * ModelLoader ) GetWatchDog ( ) * WatchDog {
return ml . wd
}
2025-12-25 13:26:18 +00:00
// SetLRUEvictionRetrySettings updates the LRU eviction retry settings
func ( ml * ModelLoader ) SetLRUEvictionRetrySettings ( maxRetries int , retryInterval time . Duration ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
ml . lruEvictionMaxRetries = maxRetries
ml . lruEvictionRetryInterval = retryInterval
}
2023-04-20 16:33:02 +00:00
func ( ml * ModelLoader ) ExistsInModelPath ( s string ) bool {
2024-04-19 02:40:18 +00:00
return utils . ExistsInPath ( ml . ModelPath , s )
2023-04-20 16:33:02 +00:00
}
2025-06-15 12:56:52 +00:00
func ( ml * ModelLoader ) SetExternalBackend ( name , uri string ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
ml . externalBackends [ name ] = uri
}
func ( ml * ModelLoader ) DeleteExternalBackend ( name string ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
delete ( ml . externalBackends , name )
}
func ( ml * ModelLoader ) GetExternalBackend ( name string ) string {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
return ml . externalBackends [ name ]
}
func ( ml * ModelLoader ) GetAllExternalBackends ( o * Options ) map [ string ] string {
backends := make ( map [ string ] string )
maps . Copy ( backends , ml . externalBackends )
if o != nil {
maps . Copy ( backends , o . externalBackends )
}
return backends
}
2024-07-10 13:28:39 +00:00
var knownFilesToSkip [ ] string = [ ] string {
"MODEL_CARD" ,
"README" ,
"README.md" ,
}
var knownModelsNameSuffixToSkip [ ] string = [ ] string {
".tmpl" ,
".keep" ,
".yaml" ,
".yml" ,
".json" ,
2024-07-18 12:44:44 +00:00
".txt" ,
2025-01-16 21:23:09 +00:00
".pt" ,
".onnx" ,
2024-07-18 12:44:44 +00:00
".md" ,
".MD" ,
2024-07-10 13:28:39 +00:00
".DS_Store" ,
"." ,
2024-11-06 11:04:39 +00:00
".safetensors" ,
2025-09-25 07:15:06 +00:00
".bin" ,
2026-02-05 09:17:46 +00:00
".gguf" ,
".ggml" ,
2024-07-11 17:55:01 +00:00
".partial" ,
".tar.gz" ,
2024-07-10 13:28:39 +00:00
}
2024-09-17 04:50:57 +00:00
const retryTimeout = time . Duration ( 2 * time . Minute )
2024-07-10 13:28:39 +00:00
func ( ml * ModelLoader ) ListFilesInModelPath ( ) ( [ ] string , error ) {
2023-07-22 15:31:39 +00:00
files , err := os . ReadDir ( ml . ModelPath )
2023-04-10 10:02:40 +00:00
if err != nil {
return [ ] string { } , err
}
models := [ ] string { }
2024-07-10 13:28:39 +00:00
FILE :
2023-04-10 10:02:40 +00:00
for _ , file := range files {
2024-07-10 13:28:39 +00:00
for _ , skip := range knownFilesToSkip {
if strings . EqualFold ( file . Name ( ) , skip ) {
continue FILE
}
}
// Skip templates, YAML, .keep, .json, and .DS_Store files
for _ , skip := range knownModelsNameSuffixToSkip {
if strings . HasSuffix ( file . Name ( ) , skip ) {
continue FILE
}
}
// Skip directories
if file . IsDir ( ) {
2023-04-20 16:33:02 +00:00
continue
2023-04-10 10:02:40 +00:00
}
2023-04-20 16:33:02 +00:00
models = append ( models , file . Name ( ) )
2023-04-10 10:02:40 +00:00
}
return models , nil
}
2025-08-16 05:44:50 +00:00
func ( ml * ModelLoader ) ListLoadedModels ( ) [ ] * Model {
2024-08-30 13:20:39 +00:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-11-14 13:12:29 +00:00
models := [ ] * Model { }
2024-08-30 13:20:39 +00:00
for _ , model := range ml . models {
2024-11-14 13:12:29 +00:00
models = append ( models , model )
2024-08-30 13:20:39 +00:00
}
return models
}
2024-10-02 06:55:58 +00:00
func ( ml * ModelLoader ) LoadModel ( modelID , modelName string , loader func ( string , string , string ) ( * Model , error ) ) ( * Model , error ) {
2025-10-16 19:28:19 +00:00
ml . mu . Lock ( )
2023-05-10 23:12:58 +00:00
// Check if we already have a loaded model
2025-10-16 19:28:19 +00:00
if model := ml . checkIsLoaded ( modelID ) ; model != nil {
2025-12-12 11:28:38 +00:00
ml . mu . Unlock ( )
2023-07-14 23:19:43 +00:00
return model , nil
2023-04-19 15:10:29 +00:00
}
2023-04-20 16:33:02 +00:00
2025-12-12 11:28:38 +00:00
// Check if another goroutine is already loading this model
if loadingChan , isLoading := ml . loading [ modelID ] ; isLoading {
ml . mu . Unlock ( )
// Wait for the other goroutine to finish loading
2025-12-21 18:33:13 +00:00
xlog . Debug ( "Waiting for model to be loaded by another request" , "modelID" , modelID )
2025-12-12 11:28:38 +00:00
<- loadingChan
// Now check if the model is loaded
ml . mu . Lock ( )
model := ml . checkIsLoaded ( modelID )
ml . mu . Unlock ( )
if model != nil {
return model , nil
}
// If still not loaded, the other goroutine failed - we'll try again
return ml . LoadModel ( modelID , modelName , loader )
}
// Mark this model as loading (create a channel that will be closed when done)
loadingChan := make ( chan struct { } )
ml . loading [ modelID ] = loadingChan
ml . mu . Unlock ( )
// Ensure we clean up the loading state when done
defer func ( ) {
ml . mu . Lock ( )
delete ( ml . loading , modelID )
close ( loadingChan )
ml . mu . Unlock ( )
} ( )
// Load the model (this can take a long time, no lock held)
2023-04-27 04:18:18 +00:00
modelFile := filepath . Join ( ml . ModelPath , modelName )
2025-12-21 18:33:13 +00:00
xlog . Debug ( "Loading model in memory from file" , "file" , modelFile )
2023-04-20 16:33:02 +00:00
2024-10-02 06:55:58 +00:00
model , err := loader ( modelID , modelName , modelFile )
2023-04-19 15:10:29 +00:00
if err != nil {
2024-10-02 18:37:40 +00:00
return nil , fmt . Errorf ( "failed to load model with internal loader: %s" , err )
2023-04-19 15:10:29 +00:00
}
2024-08-30 13:20:39 +00:00
if model == nil {
return nil , fmt . Errorf ( "loader didn't return a model" )
}
2025-12-12 11:28:38 +00:00
// Add to models map
ml . mu . Lock ( )
2024-10-02 06:55:58 +00:00
ml . models [ modelID ] = model
2025-12-12 11:28:38 +00:00
ml . mu . Unlock ( )
2024-08-30 13:20:39 +00:00
2023-05-11 14:34:16 +00:00
return model , nil
2023-05-05 09:20:06 +00:00
}
2023-07-14 23:19:43 +00:00
2023-08-23 16:38:37 +00:00
func ( ml * ModelLoader ) ShutdownModel ( modelName string ) error {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-09-17 04:50:57 +00:00
return ml . deleteProcess ( modelName )
2023-08-23 16:38:37 +00:00
}
2024-08-25 12:36:09 +00:00
func ( ml * ModelLoader ) CheckIsLoaded ( s string ) * Model {
2024-09-17 14:51:40 +00:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2025-10-16 19:28:19 +00:00
return ml . checkIsLoaded ( s )
}
func ( ml * ModelLoader ) checkIsLoaded ( s string ) * Model {
2024-08-25 12:36:09 +00:00
m , ok := ml . models [ s ]
if ! ok {
return nil
2023-07-14 23:19:43 +00:00
}
2025-12-21 18:33:13 +00:00
xlog . Debug ( "Model already loaded in memory" , "model" , s )
2024-08-30 13:20:39 +00:00
client := m . GRPC ( false , ml . wd )
2025-12-21 18:33:13 +00:00
xlog . Debug ( "Checking model availability" , "model" , s )
2024-08-30 13:20:39 +00:00
cTimeout , cancel := context . WithTimeout ( context . Background ( ) , 2 * time . Minute )
defer cancel ( )
alive , err := client . HealthCheck ( cTimeout )
2024-08-25 12:36:09 +00:00
if ! alive {
2025-12-21 18:33:13 +00:00
xlog . Warn ( "GRPC Model not responding" , "error" , err )
xlog . Warn ( "Deleting the process in order to recreate it" )
2024-09-26 10:44:55 +00:00
process := m . Process ( )
if process == nil {
2025-12-21 18:33:13 +00:00
xlog . Error ( "Process not found and the model is not responding anymore" , "model" , s )
2024-08-30 13:20:39 +00:00
return m
}
if ! process . IsAlive ( ) {
2025-12-21 18:33:13 +00:00
xlog . Debug ( "GRPC Process is not responding" , "model" , s )
2024-08-25 12:36:09 +00:00
// stop and delete the process, this forces to re-load the model and re-create again the service
err := ml . deleteProcess ( s )
if err != nil {
2025-12-21 18:33:13 +00:00
xlog . Error ( "error stopping process" , "error" , err , "process" , s )
2024-08-25 12:36:09 +00:00
}
return nil
}
2023-07-22 15:31:39 +00:00
}
2024-08-25 12:36:09 +00:00
return m
2023-07-22 15:31:39 +00:00
}