2024-04-11 07:19:24 +00:00
package cli
import (
"context"
2026-02-01 16:33:17 +00:00
"encoding/json"
2024-04-11 07:19:24 +00:00
"fmt"
2026-02-01 16:33:17 +00:00
"strings"
2024-04-11 07:19:24 +00:00
2024-06-23 08:24:36 +00:00
"github.com/mudler/LocalAI/core/backend"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
2026-02-01 16:33:17 +00:00
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
2024-06-23 08:24:36 +00:00
"github.com/mudler/LocalAI/pkg/model"
2025-08-14 17:38:26 +00:00
"github.com/mudler/LocalAI/pkg/system"
2025-12-21 18:33:13 +00:00
"github.com/mudler/xlog"
2024-04-11 07:19:24 +00:00
)
type TranscriptCMD struct {
2026-02-01 16:33:17 +00:00
Filename string ` arg:"" name:"file" help:"Audio file to transcribe" type:"path" `
2024-04-11 07:19:24 +00:00
2026-02-01 16:33:17 +00:00
Backend string ` short:"b" default:"whisper" help:"Backend to run the transcription model" `
Model string ` short:"m" required:"" help:"Model name to run the TTS" `
Language string ` short:"l" help:"Language of the audio file" `
Translate bool ` short:"c" help:"Translate the transcription to English" `
Diarize bool ` short:"d" help:"Mark speaker turns" `
Threads int ` short:"t" default:"1" help:"Number of threads used for parallel computation" `
BackendsPath string ` env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"$ { basepath}/backends" help:"Path containing backends used for inferencing" group:"storage" `
ModelsPath string ` env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"$ { basepath}/models" help:"Path containing models used for inferencing" group:"storage" `
BackendGalleries string ` env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"$ { backends}" `
Prompt string ` short:"p" help:"Previous transcribed text or words that hint at what the model should expect" `
2026-02-23 17:57:06 +00:00
ResponseFormat schema . TranscriptionResponseFormatType ` short:"f" default:"" help:"Response format for Whisper models, can be one of (txt, lrc, srt, vtt, json, verbose_json)" `
PrettyPrint bool ` help:"Used with response_format json or verbose_json for pretty printing" `
2024-04-11 07:19:24 +00:00
}
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
func ( t * TranscriptCMD ) Run ( ctx * cliContext . Context ) error {
2025-08-14 17:38:26 +00:00
systemState , err := system . GetSystemState (
2026-02-01 16:33:17 +00:00
system . WithBackendPath ( t . BackendsPath ) ,
2025-08-14 17:38:26 +00:00
system . WithModelPath ( t . ModelsPath ) ,
)
if err != nil {
return err
}
2024-04-11 07:19:24 +00:00
opts := & config . ApplicationConfig {
2025-08-14 17:38:26 +00:00
SystemState : systemState ,
Context : context . Background ( ) ,
2024-04-11 07:19:24 +00:00
}
2025-08-14 17:38:26 +00:00
cl := config . NewModelConfigLoader ( t . ModelsPath )
2025-12-12 11:28:38 +00:00
ml := model . NewModelLoader ( systemState )
2026-02-01 16:33:17 +00:00
if err := gallery . RegisterBackends ( systemState , ml ) ; err != nil {
xlog . Error ( "error registering external backends" , "error" , err )
}
2025-08-14 17:38:26 +00:00
if err := cl . LoadModelConfigsFromPath ( t . ModelsPath ) ; err != nil {
2024-04-11 07:19:24 +00:00
return err
}
2025-08-14 17:38:26 +00:00
c , exists := cl . GetModelConfig ( t . Model )
2024-04-11 07:19:24 +00:00
if ! exists {
2026-04-21 09:53:26 +00:00
return fmt . Errorf ( "model %q not found. Run 'local-ai models list' to see available models, or install one with 'local-ai models install <model>'. See https://localai.io/models/ for more information" , t . Model )
2024-04-11 07:19:24 +00:00
}
c . Threads = & t . Threads
2024-04-29 13:11:42 +00:00
defer func ( ) {
err := ml . StopAllGRPC ( )
if err != nil {
2025-12-21 18:33:13 +00:00
xlog . Error ( "unable to stop all grpc processes" , "error" , err )
2024-04-29 13:11:42 +00:00
}
} ( )
2024-04-11 07:19:24 +00:00
feat(whisper): honor client cancellation via ggml abort_callback (#9710)
* refactor(transcription): propagate request ctx through ModelTranscription*
Replaces context.Background() with the HTTP request ctx so client
disconnects start cancelling the gRPC call. No backend-side abort wiring
yet — that comes in a later commit. Pure plumbing.
Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fix(cli): pass ctx to backend.ModelTranscription
Follow-up to e65d3e1f which threaded ctx through ModelTranscription
but missed the CLI caller. CLI commands have no request-scoped ctx,
so context.Background() is correct here.
Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* refactor(audio): propagate request ctx into TTS, sound-gen, audio-transform
Same ctx-plumbing pattern applied to the rest of the audio path. CLI
callers use context.Background() since there is no request scope; HTTP
callers use c.Request().Context().
Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* refactor(backend): propagate request ctx into biometric, detection, rerank, diarization paths
Replaces remaining context.Background() sites in core/backend with the
caller's ctx. After this commit, every core/backend/*.go entry point
threads the request ctx end-to-end to the gRPC client.
Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* refactor(grpc): plumb ctx through AIModel.AudioTranscription{,Stream}
Adds context.Context as first parameter to the AIModel interface methods
that wrap whisper-style transcription. Server-side gRPC handler now
forwards the per-RPC ctx (server-streaming uses stream.Context()).
Whisper, Voxtral, vibevoice-cpp, and sherpa-onnx accept the parameter;
none uses it yet — the actual cancellation primitive lands in the next
commit so this is pure plumbing.
Assisted-by: Claude:claude-sonnet-4-6
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(whisper): add abort_callback hook in the C++ bridge
Installs a std::atomic<int> flag, wires it into
whisper_full_params.abort_callback, and exposes a set_abort(int) C
symbol so Go can flip the flag from a goroutine watching the request
context. transcribe() now distinguishes abort (return 2) from real
whisper_full failure (return 1).
Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(whisper): register set_abort symbol in the purego loader
Adds the Go-side binding for the new C export so the next commit can
call CppSetAbort(1) from a watcher goroutine on ctx.Done().
Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(whisper): honor ctx cancellation and return codes.Canceled
A watcher goroutine watches ctx.Done() during AudioTranscription and
calls CppSetAbort(1) on cancel. whisper_full sees abort_callback return
true at the next compute graph step, returns non-zero, and the bridge
returns 2 -> AudioTranscription maps that to codes.Canceled.
Adds an opt-in test (gated on WHISPER_MODEL_PATH / WHISPER_AUDIO_PATH)
that asserts cancellation latency under 5s and proves the abort flag
resets cleanly so the next transcription succeeds.
Assisted-by: Claude:claude-sonnet-4-6
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fix(whisper): join the cancel watcher goroutine before returning
Follow-up to 85edf9d2. The previous commit used `defer close(done)` and
called the watcher "joined synchronously" — but close() only signals,
it does not block until the goroutine exits. That left a window where
a late CppSetAbort(1) from a cancelled call could land on the next
call, after its C-side g_abort reset but before whisper_full() began
polling the abort callback, corrupting the second transcription.
Switch to a sync.WaitGroup join so wg.Wait() blocks until the watcher
has actually returned from its select.
Assisted-by: Claude:claude-sonnet-4-6
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fix(whisper): short-circuit pre-cancelled ctx in AudioTranscription
If ctx is already Done() at entry, return codes.Canceled immediately
instead of running the full transcription. The C-side g_abort reset
happens at the start of transcribe() and would otherwise overwrite a
watcher-set abort flag from an already-cancelled ctx, producing a
spurious successful transcription on a request the client has already
abandoned.
Assisted-by: Claude:claude-haiku-4-5
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fix(tests/distributed): update testLLM mock for new AudioTranscription signature
Phase B (93c48e19) added context.Context to AIModel.AudioTranscription
but missed the testLLM mock in tests/e2e/distributed. CI golangci-lint
caught it: *testLLM did not implement grpc.AIModel because the method
signature lacked the ctx parameter, which broke the distributed test
suite compilation and cascaded through every backend-build job that
runs `go build ./...`.
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* test(whisper): port cancellation test to Ginkgo/Gomega
Project policy (.agents/coding-style.md, enforced by golangci-lint
forbidigo) is that all Go tests must use Ginkgo v2 + Gomega — no
stdlib testing patterns (t.Skip, t.Fatalf, etc.). Convert the
cancellation test to a Describe/It block with Skip(...) for env
gating and Expect/HaveOccurred for assertions.
Same coverage: cancel mid-flight returns codes.Canceled within 5s and
a follow-up transcription succeeds, proving the C-side g_abort flag
resets cleanly.
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-07 23:44:47 +00:00
tr , err := backend . ModelTranscription ( context . Background ( ) , t . Filename , t . Language , t . Translate , t . Diarize , t . Prompt , ml , c , opts )
2024-04-17 21:33:49 +00:00
if err != nil {
return err
2024-04-11 07:19:24 +00:00
}
2026-02-01 16:33:17 +00:00
switch t . ResponseFormat {
case schema . TranscriptionResponseFormatLrc , schema . TranscriptionResponseFormatSrt , schema . TranscriptionResponseFormatVtt , schema . TranscriptionResponseFormatText :
2026-03-29 22:47:27 +00:00
fmt . Println ( schema . TranscriptionResponse ( tr , t . ResponseFormat ) )
2026-02-01 16:33:17 +00:00
case schema . TranscriptionResponseFormatJson :
tr . Segments = nil
2026-05-05 22:32:52 +00:00
tr . Words = nil
2026-02-01 16:33:17 +00:00
fallthrough
case schema . TranscriptionResponseFormatJsonVerbose :
2026-05-05 22:32:52 +00:00
trs := schema . TranscriptionResultSeconds {
Text : tr . Text ,
Language : tr . Language ,
Duration : tr . Duration ,
Words : [ ] schema . TranscriptionWordSeconds { } ,
Segments : [ ] schema . TranscriptionSegmentSeconds { } ,
}
for _ , word := range ( tr . Words ) {
trs . Words = append ( trs . Words , schema . TranscriptionWordSeconds {
Start : word . Start . Seconds ( ) ,
End : word . End . Seconds ( ) ,
Text : word . Text ,
} )
}
for _ , seg := range ( tr . Segments ) {
segWords := [ ] schema . TranscriptionWordSeconds { }
for _ , word := range ( seg . Words ) {
segWords = append ( segWords , schema . TranscriptionWordSeconds {
Start : word . Start . Seconds ( ) ,
End : word . End . Seconds ( ) ,
Text : word . Text ,
} )
}
trs . Segments = append ( trs . Segments , schema . TranscriptionSegmentSeconds {
Id : seg . Id ,
Start : seg . Start . Seconds ( ) ,
End : seg . End . Seconds ( ) ,
Text : seg . Text ,
Tokens : seg . Tokens ,
Speaker : seg . Speaker ,
Words : segWords ,
} )
}
2026-02-01 16:33:17 +00:00
var mtr [ ] byte
var err error
if t . PrettyPrint {
2026-05-05 22:32:52 +00:00
mtr , err = json . MarshalIndent ( trs , "" , " " )
2026-02-01 16:33:17 +00:00
} else {
2026-05-05 22:32:52 +00:00
mtr , err = json . Marshal ( trs )
2026-02-01 16:33:17 +00:00
}
if err != nil {
return err
}
fmt . Println ( string ( mtr ) )
default :
for _ , segment := range tr . Segments {
fmt . Println ( segment . Start . String ( ) , "-" , strings . TrimSpace ( segment . Text ) )
}
2024-04-11 07:19:24 +00:00
}
return nil
}