mirror of
https://github.com/mudler/LocalAI
synced 2026-04-21 13:27:21 +00:00
chore(refactor): use interface (#9226)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
557d0f0f04
commit
6f304d1201
6 changed files with 24 additions and 14 deletions
|
|
@ -512,11 +512,9 @@ func (s *backendSupervisor) stopBackend(backend string) {
|
|||
|
||||
// Network I/O outside the lock
|
||||
client := grpc.NewClientWithToken(bp.addr, false, nil, false, s.cmd.RegistrationToken)
|
||||
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
|
||||
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
|
||||
if err := freeFunc.Free(context.Background()); err != nil {
|
||||
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
|
||||
}
|
||||
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
|
||||
if err := client.Free(context.Background()); err != nil {
|
||||
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
|
||||
}
|
||||
|
||||
xlog.Info("Stopping backend process", "backend", backend, "addr", bp.addr)
|
||||
|
|
@ -774,10 +772,8 @@ func (s *backendSupervisor) subscribeLifecycleEvents() {
|
|||
if targetAddr != "" {
|
||||
// Best-effort gRPC Free()
|
||||
client := grpc.NewClientWithToken(targetAddr, false, nil, false, s.cmd.RegistrationToken)
|
||||
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
|
||||
if err := freeFunc.Free(context.Background()); err != nil {
|
||||
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
|
||||
}
|
||||
if err := client.Free(context.Background()); err != nil {
|
||||
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -231,6 +231,9 @@ func (c *fakeBackendClient) QuantizationProgress(_ context.Context, _ *pb.Quanti
|
|||
func (c *fakeBackendClient) StopQuantization(_ context.Context, _ *pb.QuantizationStopRequest, _ ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (c *fakeBackendClient) Free(_ context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- fakeBackendClientFactory ---
|
||||
|
||||
|
|
|
|||
|
|
@ -175,6 +175,10 @@ func (f *fakeGRPCBackend) StopQuantization(_ context.Context, _ *pb.Quantization
|
|||
return &pb.Result{}, nil
|
||||
}
|
||||
|
||||
func (f *fakeGRPCBackend) Free(_ context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- Tests ---
|
||||
|
||||
var _ = Describe("InFlightTrackingClient", func() {
|
||||
|
|
|
|||
|
|
@ -85,4 +85,7 @@ type Backend interface {
|
|||
StartQuantization(ctx context.Context, in *pb.QuantizationRequest, opts ...grpc.CallOption) (*pb.QuantizationJobResult, error)
|
||||
QuantizationProgress(ctx context.Context, in *pb.QuantizationProgressRequest, f func(update *pb.QuantizationProgressUpdate), opts ...grpc.CallOption) error
|
||||
StopQuantization(ctx context.Context, in *pb.QuantizationStopRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
|
||||
// Free releases GPU/model resources (e.g. VRAM) without stopping the process.
|
||||
Free(ctx context.Context) error
|
||||
}
|
||||
|
|
|
|||
|
|
@ -163,6 +163,11 @@ func (e *embedBackend) StopQuantization(ctx context.Context, in *pb.Quantization
|
|||
return e.s.StopQuantization(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) Free(ctx context.Context) error {
|
||||
_, err := e.s.Free(ctx, &pb.HealthMessage{})
|
||||
return err
|
||||
}
|
||||
|
||||
var _ pb.Backend_FineTuneProgressServer = new(embedBackendFineTuneProgressStream)
|
||||
|
||||
type embedBackendFineTuneProgressStream struct {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package model
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
|
|
@ -52,11 +53,9 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
|||
}
|
||||
|
||||
// Free GPU resources before stopping the process to ensure VRAM is released
|
||||
if freeFunc, ok := model.GRPC(false, ml.wd).(interface{ Free() error }); ok {
|
||||
xlog.Debug("Calling Free() to release GPU resources", "model", s)
|
||||
if err := freeFunc.Free(); err != nil {
|
||||
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
|
||||
}
|
||||
xlog.Debug("Calling Free() to release GPU resources", "model", s)
|
||||
if err := model.GRPC(false, ml.wd).Free(context.Background()); err != nil {
|
||||
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
|
||||
}
|
||||
|
||||
process := model.Process()
|
||||
|
|
|
|||
Loading…
Reference in a new issue