chore(refactor): use interface (#9226)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2026-04-04 17:29:37 +02:00 committed by GitHub
parent 557d0f0f04
commit 6f304d1201
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 24 additions and 14 deletions

View file

@ -512,11 +512,9 @@ func (s *backendSupervisor) stopBackend(backend string) {
// Network I/O outside the lock
client := grpc.NewClientWithToken(bp.addr, false, nil, false, s.cmd.RegistrationToken)
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
if err := freeFunc.Free(context.Background()); err != nil {
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
}
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
if err := client.Free(context.Background()); err != nil {
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
}
xlog.Info("Stopping backend process", "backend", backend, "addr", bp.addr)
@ -774,10 +772,8 @@ func (s *backendSupervisor) subscribeLifecycleEvents() {
if targetAddr != "" {
// Best-effort gRPC Free()
client := grpc.NewClientWithToken(targetAddr, false, nil, false, s.cmd.RegistrationToken)
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
if err := freeFunc.Free(context.Background()); err != nil {
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
}
if err := client.Free(context.Background()); err != nil {
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
}
}

View file

@ -231,6 +231,9 @@ func (c *fakeBackendClient) QuantizationProgress(_ context.Context, _ *pb.Quanti
func (c *fakeBackendClient) StopQuantization(_ context.Context, _ *pb.QuantizationStopRequest, _ ...ggrpc.CallOption) (*pb.Result, error) {
return nil, nil
}
func (c *fakeBackendClient) Free(_ context.Context) error {
return nil
}
// --- fakeBackendClientFactory ---

View file

@ -175,6 +175,10 @@ func (f *fakeGRPCBackend) StopQuantization(_ context.Context, _ *pb.Quantization
return &pb.Result{}, nil
}
func (f *fakeGRPCBackend) Free(_ context.Context) error {
return nil
}
// --- Tests ---
var _ = Describe("InFlightTrackingClient", func() {

View file

@ -85,4 +85,7 @@ type Backend interface {
StartQuantization(ctx context.Context, in *pb.QuantizationRequest, opts ...grpc.CallOption) (*pb.QuantizationJobResult, error)
QuantizationProgress(ctx context.Context, in *pb.QuantizationProgressRequest, f func(update *pb.QuantizationProgressUpdate), opts ...grpc.CallOption) error
StopQuantization(ctx context.Context, in *pb.QuantizationStopRequest, opts ...grpc.CallOption) (*pb.Result, error)
// Free releases GPU/model resources (e.g. VRAM) without stopping the process.
Free(ctx context.Context) error
}

View file

@ -163,6 +163,11 @@ func (e *embedBackend) StopQuantization(ctx context.Context, in *pb.Quantization
return e.s.StopQuantization(ctx, in)
}
func (e *embedBackend) Free(ctx context.Context) error {
_, err := e.s.Free(ctx, &pb.HealthMessage{})
return err
}
var _ pb.Backend_FineTuneProgressServer = new(embedBackendFineTuneProgressStream)
type embedBackendFineTuneProgressStream struct {

View file

@ -1,6 +1,7 @@
package model
import (
"context"
"errors"
"fmt"
"os"
@ -52,11 +53,9 @@ func (ml *ModelLoader) deleteProcess(s string) error {
}
// Free GPU resources before stopping the process to ensure VRAM is released
if freeFunc, ok := model.GRPC(false, ml.wd).(interface{ Free() error }); ok {
xlog.Debug("Calling Free() to release GPU resources", "model", s)
if err := freeFunc.Free(); err != nil {
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
}
xlog.Debug("Calling Free() to release GPU resources", "model", s)
if err := model.GRPC(false, ml.wd).Free(context.Background()); err != nil {
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
}
process := model.Process()