diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1479aade3..61e954733 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -104,7 +104,7 @@ jobs: run: | make backends/transformers mkdir external && mv backends/transformers external/transformers - make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml + make backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml - name: Test run: | TRANSFORMER_BACKEND=$PWD/external/transformers/run.sh PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test diff --git a/backend/README.md b/backend/README.md index 1e67ec7d2..10c01d524 100644 --- a/backend/README.md +++ b/backend/README.md @@ -53,7 +53,6 @@ The backend system provides language-specific Dockerfiles that handle the build #### Go Backends (`go/`) - **whisper**: OpenAI Whisper speech recognition in Go with GGML cpp backend (whisper.cpp) - **stablediffusion-ggml**: Stable Diffusion in Go with GGML Cpp backend -- **huggingface**: Hugging Face model integration - **piper**: Text-to-speech synthesis Golang with C bindings using rhaspy/piper - **local-store**: Vector storage backend diff --git a/backend/go/huggingface/Makefile b/backend/go/huggingface/Makefile deleted file mode 100644 index fddd61c00..000000000 --- a/backend/go/huggingface/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -GOCMD=go - -huggingface: - CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./ - -package: - bash package.sh - -build: huggingface package - -clean: - rm -f huggingface \ No newline at end of file diff --git a/backend/go/huggingface/langchain.go b/backend/go/huggingface/langchain.go deleted file mode 100644 index a18c6c876..000000000 --- a/backend/go/huggingface/langchain.go +++ /dev/null @@ -1,64 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - "os" - - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - "github.com/mudler/LocalAI/pkg/langchain" -) - -type LLM struct { - base.Base - - langchain *langchain.HuggingFace - model string -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - var err error - hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN") - if hfToken == "" { - return fmt.Errorf("no huggingface token provided") - } - llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken) - llm.model = opts.Model - return err -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - o := []langchain.PredictOption{ - langchain.SetModel(llm.model), - langchain.SetMaxTokens(int(opts.Tokens)), - langchain.SetTemperature(float64(opts.Temperature)), - langchain.SetStopWords(opts.StopPrompts), - } - pred, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...) - if err != nil { - return "", err - } - return pred.Completion, nil -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - o := []langchain.PredictOption{ - langchain.SetModel(llm.model), - langchain.SetMaxTokens(int(opts.Tokens)), - langchain.SetTemperature(float64(opts.Temperature)), - langchain.SetStopWords(opts.StopPrompts), - } - go func() { - res, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res.Completion - close(results) - }() - - return nil -} diff --git a/backend/go/huggingface/main.go b/backend/go/huggingface/main.go deleted file mode 100644 index acf440879..000000000 --- a/backend/go/huggingface/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &LLM{}); err != nil { - panic(err) - } -} diff --git a/backend/go/huggingface/package.sh b/backend/go/huggingface/package.sh deleted file mode 100755 index 62fc59c6a..000000000 --- a/backend/go/huggingface/package.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# Script to copy the appropriate libraries based on architecture -# This script is used in the final stage of the Dockerfile - -set -e - -CURDIR=$(dirname "$(realpath $0)") - -mkdir -p $CURDIR/package -cp -avf $CURDIR/huggingface $CURDIR/package/ -cp -rfv $CURDIR/run.sh $CURDIR/package/ \ No newline at end of file diff --git a/backend/go/huggingface/run.sh b/backend/go/huggingface/run.sh deleted file mode 100755 index 08972b5d2..000000000 --- a/backend/go/huggingface/run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -ex - -CURDIR=$(dirname "$(realpath $0)") - -exec $CURDIR/huggingface "$@" \ No newline at end of file diff --git a/backend/index.yaml b/backend/index.yaml index db095659c..619c17a82 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -755,20 +755,6 @@ - open-source - CPU license: MIT -- &huggingface - name: "huggingface" - uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface" - mirrors: - - localai/localai-backends:latest-huggingface - icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg - urls: - - https://huggingface.co/docs/hub/en/api - description: | - HuggingFace is a backend which uses the huggingface API to run models. - tags: - - LLM - - huggingface - license: MIT - &kitten-tts name: "kitten-tts" uri: "quay.io/go-skynet/local-ai-backends:latest-kitten-tts" @@ -1087,21 +1073,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kitten-tts" mirrors: - localai/localai-backends:master-metal-darwin-arm64-kitten-tts -- !!merge <<: *huggingface - name: "huggingface-development" - uri: "quay.io/go-skynet/local-ai-backends:master-huggingface" - mirrors: - - localai/localai-backends:master-huggingface -- !!merge <<: *huggingface - name: "metal-huggingface" - uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-huggingface" - mirrors: - - localai/localai-backends:latest-metal-darwin-arm64-huggingface -- !!merge <<: *huggingface - name: "metal-huggingface-development" - uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-huggingface" - mirrors: - - localai/localai-backends:master-metal-darwin-arm64-huggingface - !!merge <<: *local-store name: "local-store-development" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store" diff --git a/pkg/langchain/huggingface.go b/pkg/langchain/huggingface.go deleted file mode 100644 index 9be5ee9d5..000000000 --- a/pkg/langchain/huggingface.go +++ /dev/null @@ -1,53 +0,0 @@ -package langchain - -import ( - "context" - "fmt" - - "github.com/tmc/langchaingo/llms" - "github.com/tmc/langchaingo/llms/huggingface" -) - -type HuggingFace struct { - modelPath string - token string -} - -func NewHuggingFace(repoId, token string) (*HuggingFace, error) { - if token == "" { - return nil, fmt.Errorf("no huggingface token provided") - } - return &HuggingFace{ - modelPath: repoId, - token: token, - }, nil -} - -func (s *HuggingFace) PredictHuggingFace(text string, opts ...PredictOption) (*Predict, error) { - po := NewPredictOptions(opts...) - - // Init client - llm, err := huggingface.New(huggingface.WithToken(s.token)) - if err != nil { - return nil, err - } - - // Convert from LocalAI to LangChainGo format of options - co := []llms.CallOption{ - llms.WithModel(po.Model), - llms.WithMaxTokens(po.MaxTokens), - llms.WithTemperature(po.Temperature), - llms.WithStopWords(po.StopWords), - } - - // Call Inference API - ctx := context.Background() - completion, err := llm.Call(ctx, text, co...) - if err != nil { - return nil, err - } - - return &Predict{ - Completion: completion, - }, nil -} diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 45bd3b6af..d7b7e2790 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -22,7 +22,6 @@ var Aliases = map[string]string{ "llama": LLamaCPP, "embedded-store": LocalStoreBackend, "huggingface-embeddings": TransformersBackend, - "langchain-huggingface": LCHuggingFaceBackend, "transformers-musicgen": TransformersBackend, "sentencetransformers": TransformersBackend, "mamba": TransformersBackend, @@ -39,7 +38,6 @@ var TypeAlias = map[string]string{ const ( WhisperBackend = "whisper" StableDiffusionGGMLBackend = "stablediffusion-ggml" - LCHuggingFaceBackend = "huggingface" TransformersBackend = "transformers" LocalStoreBackend = "local-store"