2024-04-11 07:19:24 +00:00
package cli
import (
"context"
2026-02-01 16:33:17 +00:00
"encoding/json"
2024-04-11 07:19:24 +00:00
"fmt"
2026-02-01 16:33:17 +00:00
"strings"
2024-04-11 07:19:24 +00:00
2024-06-23 08:24:36 +00:00
"github.com/mudler/LocalAI/core/backend"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
2026-02-01 16:33:17 +00:00
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
2024-06-23 08:24:36 +00:00
"github.com/mudler/LocalAI/pkg/model"
2025-08-14 17:38:26 +00:00
"github.com/mudler/LocalAI/pkg/system"
2025-12-21 18:33:13 +00:00
"github.com/mudler/xlog"
2024-04-11 07:19:24 +00:00
)
type TranscriptCMD struct {
2026-02-01 16:33:17 +00:00
Filename string ` arg:"" name:"file" help:"Audio file to transcribe" type:"path" `
2024-04-11 07:19:24 +00:00
2026-02-01 16:33:17 +00:00
Backend string ` short:"b" default:"whisper" help:"Backend to run the transcription model" `
Model string ` short:"m" required:"" help:"Model name to run the TTS" `
Language string ` short:"l" help:"Language of the audio file" `
Translate bool ` short:"c" help:"Translate the transcription to English" `
Diarize bool ` short:"d" help:"Mark speaker turns" `
Threads int ` short:"t" default:"1" help:"Number of threads used for parallel computation" `
BackendsPath string ` env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"$ { basepath}/backends" help:"Path containing backends used for inferencing" group:"storage" `
ModelsPath string ` env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"$ { basepath}/models" help:"Path containing models used for inferencing" group:"storage" `
BackendGalleries string ` env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"$ { backends}" `
Prompt string ` short:"p" help:"Previous transcribed text or words that hint at what the model should expect" `
2026-02-23 17:57:06 +00:00
ResponseFormat schema . TranscriptionResponseFormatType ` short:"f" default:"" help:"Response format for Whisper models, can be one of (txt, lrc, srt, vtt, json, verbose_json)" `
PrettyPrint bool ` help:"Used with response_format json or verbose_json for pretty printing" `
2024-04-11 07:19:24 +00:00
}
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
func ( t * TranscriptCMD ) Run ( ctx * cliContext . Context ) error {
2025-08-14 17:38:26 +00:00
systemState , err := system . GetSystemState (
2026-02-01 16:33:17 +00:00
system . WithBackendPath ( t . BackendsPath ) ,
2025-08-14 17:38:26 +00:00
system . WithModelPath ( t . ModelsPath ) ,
)
if err != nil {
return err
}
2024-04-11 07:19:24 +00:00
opts := & config . ApplicationConfig {
2025-08-14 17:38:26 +00:00
SystemState : systemState ,
Context : context . Background ( ) ,
2024-04-11 07:19:24 +00:00
}
2025-08-14 17:38:26 +00:00
cl := config . NewModelConfigLoader ( t . ModelsPath )
2025-12-12 11:28:38 +00:00
ml := model . NewModelLoader ( systemState )
2026-02-01 16:33:17 +00:00
if err := gallery . RegisterBackends ( systemState , ml ) ; err != nil {
xlog . Error ( "error registering external backends" , "error" , err )
}
2025-08-14 17:38:26 +00:00
if err := cl . LoadModelConfigsFromPath ( t . ModelsPath ) ; err != nil {
2024-04-11 07:19:24 +00:00
return err
}
2025-08-14 17:38:26 +00:00
c , exists := cl . GetModelConfig ( t . Model )
2024-04-11 07:19:24 +00:00
if ! exists {
2026-04-21 09:53:26 +00:00
return fmt . Errorf ( "model %q not found. Run 'local-ai models list' to see available models, or install one with 'local-ai models install <model>'. See https://localai.io/models/ for more information" , t . Model )
2024-04-11 07:19:24 +00:00
}
c . Threads = & t . Threads
2024-04-29 13:11:42 +00:00
defer func ( ) {
err := ml . StopAllGRPC ( )
if err != nil {
2025-12-21 18:33:13 +00:00
xlog . Error ( "unable to stop all grpc processes" , "error" , err )
2024-04-29 13:11:42 +00:00
}
} ( )
2024-04-11 07:19:24 +00:00
2025-12-18 13:40:45 +00:00
tr , err := backend . ModelTranscription ( t . Filename , t . Language , t . Translate , t . Diarize , t . Prompt , ml , c , opts )
2024-04-17 21:33:49 +00:00
if err != nil {
return err
2024-04-11 07:19:24 +00:00
}
2026-02-01 16:33:17 +00:00
switch t . ResponseFormat {
case schema . TranscriptionResponseFormatLrc , schema . TranscriptionResponseFormatSrt , schema . TranscriptionResponseFormatVtt , schema . TranscriptionResponseFormatText :
2026-03-29 22:47:27 +00:00
fmt . Println ( schema . TranscriptionResponse ( tr , t . ResponseFormat ) )
2026-02-01 16:33:17 +00:00
case schema . TranscriptionResponseFormatJson :
tr . Segments = nil
fallthrough
case schema . TranscriptionResponseFormatJsonVerbose :
var mtr [ ] byte
var err error
if t . PrettyPrint {
mtr , err = json . MarshalIndent ( tr , "" , " " )
} else {
mtr , err = json . Marshal ( tr )
}
if err != nil {
return err
}
fmt . Println ( string ( mtr ) )
default :
for _ , segment := range tr . Segments {
fmt . Println ( segment . Start . String ( ) , "-" , strings . TrimSpace ( segment . Text ) )
}
2024-04-11 07:19:24 +00:00
}
return nil
}