Remove HuggingFace backend support (#8971)

* Remove HuggingFace backend support, restore other backends - Removed backend/go/huggingface directory and all related files - Removed pkg/langchain/huggingface.go - Removed LCHuggingFaceBackend from pkg/model/initializers.go - Removed huggingface backend entries from backend/index.yaml - Updated backend/README.md to remove HuggingFace backend reference - Restored kitten-tts, local-store, silero-vad, piper backends that were incorrectly removed This change removes only HuggingFace backend support from LocalAI as per the P0 priority request in issue #8963, while preserving other backends (kitten-tts, local-store, silero-vad, piper). Signed-off-by: team-coding-agent-1 <team-coding-agent-1@localai.dev> * Remove huggingface backend from test.yml build command The tests-linux CI job was failing because it was trying to build the huggingface backend which no longer exists after the backend removal. This removes huggingface from the build command in test.yml. * Apply suggestion from @mudler Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> --------- Signed-off-by: team-coding-agent-1 <team-coding-agent-1@localai.dev> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: team-coding-agent-1 <team-coding-agent-1@localai.dev> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-04-21 13:27:21 +00:00 · 2026-03-13 01:09:30 +01:00 · 2026-03-13 01:09:30 +01:00 · c0351b8e6a
commit c0351b8e6a
parent ec91c477dc
10 changed files with 1 additions and 201 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -104,7 +104,7 @@ jobs:
        run: |
          make backends/transformers
          mkdir external && mv backends/transformers external/transformers
-          make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
+          make backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
      - name: Test
        run: |
          TRANSFORMER_BACKEND=$PWD/external/transformers/run.sh PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
--- a/backend/README.md
+++ b/backend/README.md
@ -53,7 +53,6 @@ The backend system provides language-specific Dockerfiles that handle the build
 #### Go Backends (`go/`)
 - **whisper**: OpenAI Whisper speech recognition in Go with GGML cpp backend (whisper.cpp)
 - **stablediffusion-ggml**: Stable Diffusion in Go with GGML Cpp backend
- **huggingface**: Hugging Face model integration
 - **piper**: Text-to-speech synthesis Golang with C bindings using rhaspy/piper
 - **local-store**: Vector storage backend

--- a/backend/go/huggingface/Makefile
+++ b/backend/go/huggingface/Makefile
@ -1,12 +0,0 @@
-GOCMD=go
-
-huggingface:
-	CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
-
-package:
-	bash package.sh
-
-build: huggingface package
-
-clean:
-	rm -f huggingface
--- a/backend/go/huggingface/langchain.go
+++ b/backend/go/huggingface/langchain.go
@ -1,64 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-	"os"
-
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/mudler/LocalAI/pkg/langchain"
-)
-
-type LLM struct {
-	base.Base
-
-	langchain *langchain.HuggingFace
-	model     string
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	var err error
-	hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
-	if hfToken == "" {
-		return fmt.Errorf("no huggingface token provided")
-	}
-	llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
-	llm.model = opts.Model
-	return err
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	o := []langchain.PredictOption{
-		langchain.SetModel(llm.model),
-		langchain.SetMaxTokens(int(opts.Tokens)),
-		langchain.SetTemperature(float64(opts.Temperature)),
-		langchain.SetStopWords(opts.StopPrompts),
-	}
-	pred, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
-	if err != nil {
-		return "", err
-	}
-	return pred.Completion, nil
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	o := []langchain.PredictOption{
-		langchain.SetModel(llm.model),
-		langchain.SetMaxTokens(int(opts.Tokens)),
-		langchain.SetTemperature(float64(opts.Temperature)),
-		langchain.SetStopWords(opts.StopPrompts),
-	}
-	go func() {
-		res, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
-
-		if err != nil {
-			fmt.Println("err: ", err)
-		}
-		results <- res.Completion
-		close(results)
-	}()
-
-	return nil
-}
--- a/backend/go/huggingface/main.go
+++ b/backend/go/huggingface/main.go
@ -1,21 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each model
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
-		panic(err)
-	}
-}
--- a/backend/go/huggingface/package.sh
+++ b/backend/go/huggingface/package.sh
@ -1,12 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-mkdir -p $CURDIR/package
-cp -avf $CURDIR/huggingface $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
--- a/backend/go/huggingface/run.sh
+++ b/backend/go/huggingface/run.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-
-exec $CURDIR/huggingface "$@"
--- a/backend/index.yaml
+++ b/backend/index.yaml
@ -755,20 +755,6 @@
    - open-source
    - CPU
  license: MIT
- &huggingface
-  name: "huggingface"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface"
-  mirrors:
-    - localai/localai-backends:latest-huggingface
-  icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
-  urls:
-    - https://huggingface.co/docs/hub/en/api
-  description: |
-    HuggingFace is a backend which uses the huggingface API to run models.
-  tags:
-    - LLM
-    - huggingface
-  license: MIT
 - &kitten-tts
  name: "kitten-tts"
  uri: "quay.io/go-skynet/local-ai-backends:latest-kitten-tts"
@ -1087,21 +1073,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kitten-tts"
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-kitten-tts
- !!merge <<: *huggingface
-  name: "huggingface-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
-  mirrors:
-    - localai/localai-backends:master-huggingface
- !!merge <<: *huggingface
-  name: "metal-huggingface"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-huggingface"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-huggingface
- !!merge <<: *huggingface
-  name: "metal-huggingface-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-huggingface"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-huggingface
 - !!merge <<: *local-store
  name: "local-store-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
--- a/pkg/langchain/huggingface.go
+++ b/pkg/langchain/huggingface.go
@ -1,53 +0,0 @@
-package langchain
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/tmc/langchaingo/llms"
-	"github.com/tmc/langchaingo/llms/huggingface"
-)
-
-type HuggingFace struct {
-	modelPath string
-	token     string
-}
-
-func NewHuggingFace(repoId, token string) (*HuggingFace, error) {
-	if token == "" {
-		return nil, fmt.Errorf("no huggingface token provided")
-	}
-	return &HuggingFace{
-		modelPath: repoId,
-		token:     token,
-	}, nil
-}
-
-func (s *HuggingFace) PredictHuggingFace(text string, opts ...PredictOption) (*Predict, error) {
-	po := NewPredictOptions(opts...)
-
-	// Init client
-	llm, err := huggingface.New(huggingface.WithToken(s.token))
-	if err != nil {
-		return nil, err
-	}
-
-	// Convert from LocalAI to LangChainGo format of options
-	co := []llms.CallOption{
-		llms.WithModel(po.Model),
-		llms.WithMaxTokens(po.MaxTokens),
-		llms.WithTemperature(po.Temperature),
-		llms.WithStopWords(po.StopWords),
-	}
-
-	// Call Inference API
-	ctx := context.Background()
-	completion, err := llm.Call(ctx, text, co...)
-	if err != nil {
-		return nil, err
-	}
-
-	return &Predict{
-		Completion: completion,
-	}, nil
-}
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -22,7 +22,6 @@ var Aliases = map[string]string{
 	"llama":                  LLamaCPP,
 	"embedded-store":         LocalStoreBackend,
 	"huggingface-embeddings": TransformersBackend,
-	"langchain-huggingface":  LCHuggingFaceBackend,
 	"transformers-musicgen":  TransformersBackend,
 	"sentencetransformers":   TransformersBackend,
 	"mamba":                  TransformersBackend,
@ -39,7 +38,6 @@ var TypeAlias = map[string]string{
 const (
 	WhisperBackend             = "whisper"
 	StableDiffusionGGMLBackend = "stablediffusion-ggml"
-	LCHuggingFaceBackend       = "huggingface"

 	TransformersBackend = "transformers"
 	LocalStoreBackend   = "local-store"