feat: refactor build process, drop embedded backends (#5875)

* feat: split remaining backends and drop embedded backends

- Drop silero-vad, huggingface, and stores backend from embedded
  binaries
- Refactor Makefile and Dockerfile to avoid building grpc backends
- Drop golang code that was used to embed backends
- Simplify building by using goreleaser

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(gallery): be specific with llama-cpp backend templates

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(docs): update

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(ci): minor fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: drop all ffmpeg references

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: run protogen-go

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Always enable p2p mode

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update gorelease file

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(stores): do not always load

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix linting issues

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Mac OS fixup

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-07-22 16:31:04 +02:00 committed by GitHub
parent e29b2c3aff
commit 98e5291afc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
118 changed files with 631 additions and 1339 deletions

View file

@ -2,9 +2,6 @@
cd /workspace
# Grab the pre-stashed backend assets to avoid build issues
cp -r /build/backend-assets /workspace/backend-assets
# Ensures generated source files are present upon load
make prepare

View file

@ -4,9 +4,6 @@ services:
context: ..
dockerfile: Dockerfile
target: devcontainer
args:
- FFMPEG=true
- GO_TAGS=p2p tts
env_file:
- ../.env
ports:

7
.env
View file

@ -41,13 +41,6 @@
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
## Enable go tags, available: p2p, tts
## p2p: enable distributed inferencing
## tts: enables text-to-speech with go-piper
## (requires REBUILD=true)
#
# GO_TAGS=p2p
## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images

View file

@ -43,7 +43,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -55,7 +55,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -67,7 +67,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-vllm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -79,7 +79,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-transformers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -91,7 +91,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -104,7 +104,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -116,7 +116,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -128,7 +128,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-coqui'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -140,7 +140,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-bark'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -152,7 +152,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -165,7 +165,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -177,7 +177,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -189,7 +189,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -201,7 +201,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -213,7 +213,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -226,7 +226,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -238,7 +238,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -250,7 +250,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -262,7 +262,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-bark'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -274,7 +274,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -287,7 +287,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-rerankers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -299,7 +299,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -311,7 +311,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-vllm'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -323,7 +323,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-transformers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -335,7 +335,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-diffusers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -348,7 +348,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-kokoro'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -360,7 +360,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -372,7 +372,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-coqui'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -384,7 +384,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-bark'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -397,7 +397,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-rerankers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -409,7 +409,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-rerankers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -421,7 +421,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -433,7 +433,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -445,7 +445,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-vllm'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -457,7 +457,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-vllm'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -469,7 +469,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-transformers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -481,7 +481,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-transformers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -493,7 +493,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-diffusers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -506,7 +506,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-kokoro'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -518,7 +518,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-kokoro'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -530,7 +530,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -542,7 +542,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -554,7 +554,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-coqui'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -566,7 +566,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-coqui'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -578,7 +578,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-bark'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -590,7 +590,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-bark'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -603,7 +603,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-piper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -616,7 +616,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-bark-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -628,7 +628,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-cpu-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -652,7 +652,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -665,7 +665,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-cpu-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -677,7 +677,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -689,7 +689,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -701,7 +701,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -713,7 +713,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -725,7 +725,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -749,8 +749,8 @@ jobs:
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -762,7 +762,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -774,7 +774,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -786,7 +786,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -798,7 +798,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -810,7 +810,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@ -842,6 +842,45 @@ jobs:
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
#silero-vad
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-silero-vad'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "silero-vad"
dockerfile: "./backend/Dockerfile.go"
context: "./"
# local-store
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-local-store'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "local-store"
dockerfile: "./backend/Dockerfile.go"
context: "./"
# huggingface
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: '-huggingface'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "huggingface"
dockerfile: "./backend/Dockerfile.go"
context: "./"
llama-cpp-darwin:
runs-on: macOS-14
strategy:
@ -866,7 +905,7 @@ jobs:
- name: Build llama-cpp-darwin
run: |
make protogen-go
make build-api
make build
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
mv build/darwin.tar build/llama-cpp.tar
@ -954,7 +993,7 @@ jobs:
- name: Build llama-cpp-darwin
run: |
make protogen-go
make build-api
make build
export PLATFORMARCH=darwin/amd64
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar

23
.github/workflows/build-test.yaml vendored Normal file
View file

@ -0,0 +1,23 @@
name: Build test
on:
push:
branches:
- master
pull_request:
jobs:
build-test:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.23
- name: Run GoReleaser
run: |
make dev-dist

View file

@ -31,7 +31,7 @@ jobs:
make protogen-go
- name: Build api
run: |
CGO_ENABLED=0 make build-api
CGO_ENABLED=0 make build
- name: rm
uses: appleboy/ssh-action@v1.2.2
with:

View file

@ -14,7 +14,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -40,8 +39,7 @@ jobs:
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
ffmpeg: 'true'
tag-suffix: '-gpu-nvidia-cuda12'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
@ -49,7 +47,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
@ -59,15 +56,13 @@ jobs:
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl-f16-ffmpeg'
ffmpeg: 'true'
tag-suffix: 'sycl-f16'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
ffmpeg: 'true'
tag-suffix: '-vulkan-core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"

View file

@ -18,7 +18,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -40,7 +39,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas'
ffmpeg: 'true'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
@ -52,7 +50,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -76,7 +73,6 @@ jobs:
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: 'true'
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
aio: "-aio-cpu"
@ -88,7 +84,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda11'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
@ -100,7 +95,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda12'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
@ -110,7 +104,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-vulkan'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
@ -122,7 +115,6 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel-f16'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel-f16"
@ -132,7 +124,6 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel-f32'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel-f32"
@ -142,7 +133,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -167,7 +157,6 @@ jobs:
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64'
ffmpeg: 'true'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
makeflags: "--jobs=4 --output-sync=target"

View file

@ -37,10 +37,6 @@ on:
description: 'Tag suffix'
default: ''
type: string
ffmpeg:
description: 'FFMPEG'
default: ''
type: string
skip-drivers:
description: 'Skip drivers by default'
default: 'false'
@ -236,7 +232,6 @@ jobs:
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
FFMPEG=${{ inputs.ffmpeg }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@ -264,7 +259,6 @@ jobs:
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
FFMPEG=${{ inputs.ffmpeg }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target

View file

@ -96,7 +96,7 @@ jobs:
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.8.1

View file

@ -1,399 +1,26 @@
name: Build and Release
name: goreleaser
on:
push:
branches:
- master
tags:
- 'v*'
pull_request:
env:
GRPC_VERSION: v1.65.0
permissions:
contents: write
concurrency:
group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
# TODO: temporary disable linux-arm64 build
# build-linux-arm:
# runs-on: ubuntu-24.04-arm
# steps:
# - name: Free Disk Space (Ubuntu)
# uses: jlumbroso/free-disk-space@main
# with:
# # this might remove tools that are actually needed,
# # if set to "true" but frees about 6 GB
# tool-cache: true
# # all of these default to true, but feel free to set to
# # "false" if necessary for your workflow
# android: true
# dotnet: true
# haskell: true
# large-packages: true
# docker-images: true
# swap-storage: true
# - name: Release space from worker
# run: |
# echo "Listing top largest packages"
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
# head -n 30 <<< "${pkgs}"
# echo
# df -h
# echo
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
# sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
# sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
# sudo rm -rf /usr/local/lib/android
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
# sudo rm -rf /usr/share/dotnet
# sudo apt-get remove -y '^mono-.*' || true
# sudo apt-get remove -y '^ghc-.*' || true
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
# sudo apt-get remove -y 'php.*' || true
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
# sudo apt-get remove -y '^google-.*' || true
# sudo apt-get remove -y azure-cli || true
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
# sudo apt-get remove -y '^gfortran-.*' || true
# sudo apt-get remove -y microsoft-edge-stable || true
# sudo apt-get remove -y firefox || true
# sudo apt-get remove -y powershell || true
# sudo apt-get remove -y r-base-core || true
# sudo apt-get autoremove -y
# sudo apt-get clean
# echo
# echo "Listing top largest packages"
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
# head -n 30 <<< "${pkgs}"
# echo
# sudo rm -rfv build || true
# sudo rm -rf /usr/share/dotnet || true
# sudo rm -rf /opt/ghc || true
# sudo rm -rf "/usr/local/share/boost" || true
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
# df -h
# - name: Force Install GIT latest
# run: |
# sudo apt-get update \
# && sudo apt-get install -y software-properties-common \
# && sudo apt-get update \
# && sudo add-apt-repository -y ppa:git-core/ppa \
# && sudo apt-get update \
# && sudo apt-get install -y git
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - uses: actions/setup-go@v5
# with:
# go-version: '1.21.x'
# cache: false
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
# make install-go-tools
# - name: Install CUDA Dependencies
# run: |
# curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
# sudo dpkg -i cuda-keyring_1.1-1_all.deb
# sudo apt-get update
# sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
# env:
# CUDA_VERSION: 12-5
# - name: Cache grpc
# id: cache-grpc
# uses: actions/cache@v4
# with:
# path: grpc
# key: ${{ runner.os }}-grpc-arm64-${{ env.GRPC_VERSION }}
# - name: Build grpc
# if: steps.cache-grpc.outputs.cache-hit != 'true'
# run: |
# git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
# cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
# cd cmake/build && cmake -DgRPC_INSTALL=ON \
# -DgRPC_BUILD_TESTS=OFF \
# ../.. && sudo make --jobs 5 --output-sync=target
# - name: Install gRPC
# run: |
# cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
# # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
# - name: Build
# id: build
# run: |
# go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
# go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
# export PATH=$PATH:$GOPATH/bin
# export PATH=/usr/local/cuda/bin:$PATH
# sudo cp /lib64/ld-linux-aarch64.so.1 ld.so
# BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/aarch64-linux-gnu/libdl.so.2 /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
# make -j4 dist
# - uses: actions/upload-artifact@v4
# with:
# name: LocalAI-linux-arm64
# path: release/
# - name: Release
# uses: softprops/action-gh-release@v2
# if: startsWith(github.ref, 'refs/tags/')
# with:
# files: |
# release/*
# - name: Setup tmate session if tests fail
# if: ${{ failure() }}
# uses: mxschmitt/action-tmate@v3.22
# with:
# detached: true
# connect-timeout-seconds: 180
# limit-access-to-actor: true
build-linux:
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Release space from worker
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get remove -y microsoft-edge-stable || true
sudo apt-get remove -y firefox || true
sudo apt-get remove -y powershell || true
sudo apt-get remove -y r-base-core || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /opt/ghc || true
sudo rm -rf "/usr/local/share/boost" || true
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
df -h
- name: Force Install GIT latest
run: |
sudo apt-get update \
&& sudo apt-get install -y software-properties-common \
&& sudo apt-get update \
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- name: Clone
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
make install-go-tools
- name: Intel Dependencies
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo apt update
sudo apt install -y intel-basekit
- name: Install CUDA Dependencies
run: |
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
go-version: 1.23
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v6
with:
version: v2.11.0
args: release --clean
env:
CUDA_VERSION: 12-5
- name: "Install Hipblas"
env:
ROCM_VERSION: "6.1"
AMDGPU_VERSION: "6.1"
run: |
set -ex
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
sudo apt update
wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
sudo apt update
sudo amdgpu-install --usecase=rocm
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo ldconfig
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
with:
path: grpc
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC
run: |
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
- name: Build
id: build
run: |
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH
source /opt/intel/oneapi/setvars.sh
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
make -j4 dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-linux
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-macOS-x86_64:
runs-on: macos-13
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
brew install protobuf grpc
make install-go-tools
- name: Build
id: build
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-x86_64
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-macOS-arm64:
runs-on: macos-14
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
brew install protobuf grpc libomp llvm
make install-go-tools
- name: Build
id: build
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
export CC=/opt/homebrew/opt/llvm/bin/clang
make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-arm64
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View file

@ -75,7 +75,6 @@ jobs:
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install github.com/GeertJohan/go.rice/rice@latest
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Dependencies
run: |
@ -103,7 +102,7 @@ jobs:
make -C backend/python/transformers
make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
env:
CUDA_VERSION: 12-4
- name: Test
@ -164,11 +163,10 @@ jobs:
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install github.com/GeertJohan/go.rice/rice@latest
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Test
run: |
PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22
@ -199,11 +197,10 @@ jobs:
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
go install github.com/GeertJohan/go.rice/rice@latest
- name: Build llama-cpp-darwin
run: |
make protogen-go
make build-api
make build
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
mv build/darwin.tar build/llama-cpp.tar

2
.gitignore vendored
View file

@ -10,6 +10,8 @@ prepare-sources
/backend/cpp/llama-*
!backend/cpp/llama-cpp
/backends
/backend-images
/result.yaml
*.log

33
.goreleaser.yaml Normal file
View file

@ -0,0 +1,33 @@
version: 2
before:
hooks:
- make protogen-go
- go mod tidy
dist: release
source:
enabled: true
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
builds:
-
env:
- CGO_ENABLED=0
ldflags:
- -s -w
- -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
- -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
goos:
- linux
- darwin
#- windows
goarch:
- amd64
- arm64
archives:
- formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
checksum:
name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
snapshot:
version_template: "{{ .Tag }}-next"
changelog:
use: github-native

2
.vscode/launch.json vendored
View file

@ -26,7 +26,7 @@
"LOCALAI_P2P": "true",
"LOCALAI_FEDERATED": "true"
},
"buildFlags": ["-tags", "p2p tts", "-v"],
"buildFlags": ["-tags", "", "-v"],
"envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}"
}

View file

@ -142,10 +142,9 @@ EOT
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers and rice
# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
go install github.com/GeertJohan/go.rice/rice@latest
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
@ -194,7 +193,7 @@ RUN apt-get update && \
FROM build-requirements AS builder-base
ARG GO_TAGS="p2p"
ARG GO_TAGS=""
ARG GRPC_BACKENDS
ARG MAKEFLAGS
ARG LD_FLAGS="-s -w"
@ -249,8 +248,7 @@ COPY ./pkg/utils ./pkg/utils
COPY ./pkg/langchain ./pkg/langchain
RUN ls -l ./
RUN make backend-assets
RUN make grpcs
RUN make protogen-go
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.

View file

@ -1,5 +0,0 @@
VERSION 0.7
build:
FROM DOCKERFILE -f Dockerfile .
SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai

221
Makefile
View file

@ -3,9 +3,7 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
ONNX_OS?=linux
GORELEASER?=
export BUILD_TYPE?=
@ -35,77 +33,33 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
UPX?=
# check if upx exists
ifeq (, $(shell which upx))
UPX=
else
UPX=$(shell which upx)
endif
# Default Docker bridge IP
E2E_BRIDGE_IP?=172.17.0.1
ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
# Detect if we are running on arm64
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=aarch64
endif
ifeq ($(OS),Darwin)
ONNX_OS=osx
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=arm64
else ifneq (,$(findstring arm64,$(shell uname -m)))
ONNX_ARCH=arm64
else
ONNX_ARCH=x86_64
endif
ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
endif
endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
# check if goreleaser exists
ifeq (, $(shell which goreleaser))
GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
else
GORELEASER=$(shell which goreleaser)
endif
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
TEST_PATHS?=./api/... ./pkg/... ./core/...
# If empty, then we build all
ifeq ($(GRPC_BACKENDS),)
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
endif
ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS=
endif
.PHONY: all test build vendor
all: help
sources/onnxruntime:
mkdir -p sources/onnxruntime
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
ifeq ($(OS),Darwin)
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
else
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
endif
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
@ -116,58 +70,33 @@ clean: ## Remove build related file
rm -f prepare
rm -rf $(BINARY_NAME)
rm -rf release/
rm -rf backend-assets/*
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) protogen-clean
rmdir pkg/grpc/proto || true
clean-tests:
rm -rf test-models
rm -rf test-dir
rm -rf core/http/backend-assets
clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets
## Install Go tools
install-go-tools:
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install github.com/GeertJohan/go.rice/rice@latest
## Build:
build: backend-assets grpcs install-go-tools ## Build the project
build: protogen-go install-go-tools ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
endif
rm -rf $(BINARY_NAME) || true
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
rice append --exec $(BINARY_NAME)
build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
backend-assets/lib:
mkdir -p backend-assets/lib
dev-dist:
$(GORELEASER) build --snapshot --clean
dist:
GO_TAGS="p2p" $(MAKE) build
GO_TAGS="p2p" STATIC=true $(MAKE) build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),)
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
else
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
endif
$(GORELEASER) build --clean
osx-signed: build
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
@ -185,8 +114,7 @@ test-models/testmodel.ggml:
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
cp tests/models_fixtures/* test-models
prepare-test: grpcs
cp -rf backend-assets core/http
prepare-test: protogen-go
cp tests/models_fixtures/* test-models
########################################################
@ -194,7 +122,7 @@ prepare-test: grpcs
########################################################
## Test targets
test: test-models/testmodel.ggml grpcs
test: test-models/testmodel.ggml protogen-go
@echo 'Running tests'
export GO_TAGS="debug"
$(MAKE) prepare-test
@ -204,18 +132,27 @@ test: test-models/testmodel.ggml grpcs
$(MAKE) test-tts
$(MAKE) test-stablediffusion
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build-api
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
backends/piper: docker-build-piper docker-save-piper build-api
backends/piper: docker-build-piper docker-save-piper build
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build-api
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
backends/whisper: docker-build-whisper docker-save-whisper build-api
backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
backends/local-store: docker-build-local-store docker-save-local-store build
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
backends/huggingface: docker-build-huggingface docker-save-huggingface build
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
########################################################
## AIO tests
########################################################
@ -243,7 +180,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@ -275,9 +212,7 @@ test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-stores: backend-assets/grpc/local-store
mkdir -p tests/integration/backend-assets/grpc
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
test-stores:
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
test-container:
@ -310,10 +245,42 @@ protogen: protogen-go protogen-python
.PHONY: protogen-clean
protogen-clean: protogen-go-clean protogen-python-clean
protoc:
@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
ARCH_NAME=$$(uname -m); \
if [ "$$OS_NAME" = "darwin" ]; then \
if [ "$$ARCH_NAME" = "arm64" ]; then \
FILE=protoc-31.1-osx-aarch_64.zip; \
elif [ "$$ARCH_NAME" = "x86_64" ]; then \
FILE=protoc-31.1-osx-x86_64.zip; \
else \
echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
fi; \
elif [ "$$OS_NAME" = "linux" ]; then \
if [ "$$ARCH_NAME" = "x86_64" ]; then \
FILE=protoc-31.1-linux-x86_64.zip; \
elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
FILE=protoc-31.1-linux-aarch_64.zip; \
elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
FILE=protoc-31.1-linux-ppcle_64.zip; \
elif [ "$$ARCH_NAME" = "s390x" ]; then \
FILE=protoc-31.1-linux-s390_64.zip; \
elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
FILE=protoc-31.1-linux-x86_32.zip; \
else \
echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
fi; \
else \
echo "Unsupported OS: $$OS_NAME"; exit 1; \
fi; \
URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
curl -L -s $$URL -o protoc.zip && \
unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
.PHONY: protogen-go
protogen-go: install-go-tools
protogen-go: protoc install-go-tools
mkdir -p pkg/grpc/proto
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
backend/backend.proto
.PHONY: protogen-go-clean
@ -407,19 +374,6 @@ vllm-protogen:
vllm-protogen-clean:
$(MAKE) -C backend/python/vllm protogen-clean
## GRPC
# Note: it is duplicated in the Dockerfile
prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/chatterbox
$(MAKE) -C backend/python/faster-whisper
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/rerankers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/kokoro
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers
@ -433,37 +387,6 @@ test-extra: prepare-test-extra
$(MAKE) -C backend/python/chatterbox test
$(MAKE) -C backend/python/vllm test
backend-assets:
mkdir -p backend-assets
ifeq ($(BUILD_API_ONLY),true)
touch backend-assets/keep
endif
backend-assets/grpc:
mkdir -p backend-assets/grpc
backend-assets/grpc/huggingface: protogen-go backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/huggingface
endif
backend-assets/grpc/silero-vad: protogen-go backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/silero-vad
endif
backend-assets/grpc/local-store: backend-assets/grpc protogen-go
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/local-store
endif
grpcs: protogen-go $(GRPC_BACKENDS)
DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
IMAGE_TYPE?=core
@ -506,7 +429,6 @@ docker-image-intel:
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
docker-image-intel-xpu:
@ -515,7 +437,6 @@ docker-image-intel-xpu:
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
########################################################
@ -534,6 +455,24 @@ docker-build-bark-cpp:
docker-build-piper:
docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .
docker-build-local-store:
docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
docker-build-huggingface:
docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
docker-save-huggingface: backend-images
docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
docker-save-local-store: backend-images
docker save local-ai-backend:local-store -o backend-images/local-store.tar
docker-build-silero-vad:
docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
docker-save-silero-vad: backend-images
docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
docker-save-piper: backend-images
docker save local-ai-backend:piper -o backend-images/piper.tar

View file

@ -1,15 +0,0 @@
package main
import (
rice "github.com/GeertJohan/go.rice"
)
var backendAssets *rice.Box
func init() {
var err error
backendAssets, err = rice.FindBox("backend-assets")
if err != nil {
panic(err)
}
}

View file

@ -44,7 +44,7 @@ fi
if [ "$(uname)" == "Darwin" ]; then
DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
else
LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
# If there is a lib/ld.so, use it

View file

@ -0,0 +1,9 @@
GOCMD=go
huggingface:
CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
package:
bash package.sh
build: huggingface package

View file

@ -0,0 +1,12 @@
#!/bin/bash
# Script to copy the appropriate libraries based on architecture
# This script is used in the final stage of the Dockerfile
set -e
CURDIR=$(dirname "$(realpath $0)")
mkdir -p $CURDIR/package
cp -avrf $CURDIR/huggingface $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/

6
backend/go/huggingface/run.sh Executable file
View file

@ -0,0 +1,6 @@
#!/bin/bash
set -ex
CURDIR=$(dirname "$(realpath $0)")
exec $CURDIR/huggingface "$@"

View file

@ -0,0 +1,9 @@
GOCMD=go
local-store:
CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./
package:
bash package.sh
build: local-store package

View file

@ -0,0 +1,12 @@
#!/bin/bash
# Script to copy the appropriate libraries based on architecture
# This script is used in the final stage of the Dockerfile
set -e
CURDIR=$(dirname "$(realpath $0)")
mkdir -p $CURDIR/package
cp -avrf $CURDIR/local-store $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/

6
backend/go/local-store/run.sh Executable file
View file

@ -0,0 +1,6 @@
#!/bin/bash
set -ex
CURDIR=$(dirname "$(realpath $0)")
exec $CURDIR/local-store "$@"

View file

@ -4,6 +4,7 @@ package main
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"container/heap"
"errors"
"fmt"
"math"
"slices"
@ -99,6 +100,9 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
}
func (s *Store) Load(opts *pb.ModelOptions) error {
if opts.Model != "" {
return errors.New("not implemented")
}
return nil
}
@ -315,7 +319,7 @@ func isNormalized(k []float32) bool {
for _, v := range k {
v64 := float64(v)
sum += v64*v64
sum += v64 * v64
}
s := math.Sqrt(sum)

View file

@ -0,0 +1,47 @@
CURRENT_DIR=$(abspath ./)
GOCMD=go
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
ONNX_OS?=linux
# Detect if we are running on arm64
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=aarch64
endif
ifeq ($(OS),Darwin)
ONNX_OS=osx
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=arm64
else ifneq (,$(findstring arm64,$(shell uname -m)))
ONNX_ARCH=arm64
else
ONNX_ARCH=x86_64
endif
endif
sources/onnxruntime:
mkdir -p sources/onnxruntime
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime
mkdir -p backend-assets/lib
cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
ifeq ($(OS),Darwin)
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
else
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
endif
silero-vad: backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./
package:
bash package.sh
build: silero-vad package

View file

@ -0,0 +1,53 @@
#!/bin/bash
# Script to copy the appropriate libraries based on architecture
# This script is used in the final stage of the Dockerfile
set -e
CURDIR=$(dirname "$(realpath $0)")
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avrf $CURDIR/silero-vad $CURDIR/package/
cp -avrf $CURDIR/run.sh $CURDIR/package/
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
# Detect architecture and copy appropriate libraries
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
# x86_64 architecture
echo "Detected x86_64 architecture, copying x86_64 libraries..."
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
# ARM64 architecture
echo "Detected ARM64 architecture, copying ARM64 libraries..."
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
else
echo "Error: Could not detect architecture"
exit 1
fi
echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/

14
backend/go/silero-vad/run.sh Executable file
View file

@ -0,0 +1,14 @@
#!/bin/bash
set -ex
CURDIR=$(dirname "$(realpath $0)")
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
# If there is a lib/ld.so, use it
if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so"
exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
fi
exec $CURDIR/silero-vad "$@"

View file

@ -68,7 +68,7 @@
default: "cpu-stablediffusion-ggml"
nvidia: "cuda12-stablediffusion-ggml"
intel: "intel-sycl-f16-stablediffusion-ggml"
#amd: "rocm-stablediffusion-ggml"
# amd: "rocm-stablediffusion-ggml"
vulkan: "vulkan-stablediffusion-ggml"
nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
# metal: "metal-stablediffusion-ggml"
@ -285,6 +285,54 @@
tags:
- text-to-speech
- TTS
- &silero-vad
name: "silero-vad"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-silero-vad"
icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png
urls:
- https://github.com/snakers4/silero-vad
description: |
Silero VAD: pre-trained enterprise-grade Voice Activity Detector.
Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not.
tags:
- voice-activity-detection
- VAD
- silero-vad
- CPU
- &local-store
name: "local-store"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store"
urls:
- https://github.com/mudler/LocalAI
description: |
Local Store is a local-first, self-hosted, and open-source vector database.
tags:
- vector-database
- local-first
- open-source
- CPU
license: MIT
- &huggingface
name: "huggingface"
uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface"
icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
urls:
- https://huggingface.co/docs/hub/en/api
description: |
HuggingFace is a backend which uses the huggingface API to run models.
tags:
- LLM
- huggingface
license: MIT
- !!merge <<: *huggingface
name: "huggingface-development"
uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
- !!merge <<: *local-store
name: "local-store-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
- !!merge <<: *silero-vad
name: "silero-vad-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
- !!merge <<: *piper
name: "piper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-piper"

View file

@ -9,9 +9,7 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/assets"
"github.com/mudler/LocalAI/pkg/library"
"github.com/mudler/LocalAI/pkg/model"
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
"github.com/mudler/LocalAI/pkg/xsysinfo"
@ -103,23 +101,6 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
if options.AssetsDestination != "" {
// Extract files from the embedded FS
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
if err != nil {
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
}
}
if options.LibPath != "" {
// If there is a lib directory, set LD_LIBRARY_PATH to include it
err := library.LoadExternal(options.LibPath)
if err != nil {
log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
}
}
// turn off any process that was started by GRPC if the context is canceled
go func() {
<-options.Context.Done()

View file

@ -20,7 +20,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
defOpts := []model.Option{
model.WithBackendString(c.Backend),
model.WithModel(c.Model),
model.WithAssetDir(so.AssetsDestination),
model.WithContext(so.Context),
model.WithModelID(name),
}

View file

@ -7,14 +7,12 @@ import (
"github.com/mudler/LocalAI/pkg/model"
)
func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
if storeName == "" {
storeName = "default"
func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string, backend string) (grpc.Backend, error) {
if backend == "" {
backend = model.LocalStoreBackend
}
sc := []model.Option{
model.WithBackendString(model.LocalStoreBackend),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithBackendString(backend),
model.WithModel(storeName),
}

View file

@ -1,13 +1,6 @@
package cliContext
import (
rice "github.com/GeertJohan/go.rice"
)
type Context struct {
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
BackendAssets *rice.Box `kong:"-"`
}

View file

@ -23,7 +23,6 @@ type RunCMD struct {
ExternalBackends []string `env:"LOCALAI_EXTERNAL_BACKENDS,EXTERNAL_BACKENDS" help:"A list of external backends to load from gallery on boot" group:"backends"`
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
@ -46,7 +45,6 @@ type RunCMD struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
@ -99,10 +97,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithCors(r.CORS),
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
config.WithCsrf(r.CSRF),
config.WithLibPath(r.LibraryPath),
config.WithThreads(r.Threads),
config.WithBackendAssets(ctx.BackendAssets),
config.WithBackendAssetsOutput(r.BackendAssetsPath),
config.WithUploadLimitMB(r.UploadLimit),
config.WithApiKeys(r.APIKeys),
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),

View file

@ -27,7 +27,6 @@ type SoundGenerationCMD struct {
DoSample bool `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
}
@ -51,11 +50,10 @@ func parseToInt32Ptr(input string) *int32 {
func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
outputFile := t.OutputFile
outputDir := t.BackendAssetsPath
outputDir := os.TempDir()
if outputFile != "" {
outputDir = filepath.Dir(outputFile)
}
text := strings.Join(t.Text, " ")
externalBackends := make(map[string]string)
@ -71,7 +69,6 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
ModelPath: t.ModelsPath,
Context: context.Background(),
GeneratedContentDir: outputDir,
AssetsDestination: t.BackendAssetsPath,
ExternalGRPCBackends: externalBackends,
}
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)

View file

@ -15,20 +15,18 @@ import (
type TranscriptCMD struct {
Filename string `arg:""`
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
Model string `short:"m" required:"" help:"Model name to run the TTS"`
Language string `short:"l" help:"Language of the audio file"`
Translate bool `short:"c" help:"Translate the transcription to english"`
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
Model string `short:"m" required:"" help:"Model name to run the TTS"`
Language string `short:"l" help:"Language of the audio file"`
Translate bool `short:"c" help:"Translate the transcription to english"`
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
}
func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
opts := &config.ApplicationConfig{
ModelPath: t.ModelsPath,
Context: context.Background(),
AssetsDestination: t.BackendAssetsPath,
ModelPath: t.ModelsPath,
Context: context.Background(),
}
cl := config.NewBackendConfigLoader(t.ModelsPath)

View file

@ -17,18 +17,17 @@ import (
type TTSCMD struct {
Text []string `arg:""`
Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"`
Model string `short:"m" required:"" help:"Model name to run the TTS"`
Voice string `short:"v" help:"Voice name to run the TTS"`
Language string `short:"l" help:"Language to use with the TTS"`
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"`
Model string `short:"m" required:"" help:"Model name to run the TTS"`
Voice string `short:"v" help:"Voice name to run the TTS"`
Language string `short:"l" help:"Language to use with the TTS"`
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
}
func (t *TTSCMD) Run(ctx *cliContext.Context) error {
outputFile := t.OutputFile
outputDir := t.BackendAssetsPath
outputDir := os.TempDir()
if outputFile != "" {
outputDir = filepath.Dir(outputFile)
}
@ -39,7 +38,6 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
ModelPath: t.ModelsPath,
Context: context.Background(),
GeneratedContentDir: outputDir,
AssetsDestination: t.BackendAssetsPath,
}
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)

View file

@ -1,7 +1,7 @@
package worker
type WorkerFlags struct {
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
}

View file

@ -9,8 +9,6 @@ import (
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/assets"
"github.com/mudler/LocalAI/pkg/library"
"github.com/rs/zerolog/log"
)
@ -47,24 +45,17 @@ func findLLamaCPPBackend(backendSystemPath string) (string, error) {
}
func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
// Extract files from the embedded FS
err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
if err != nil {
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
}
if len(os.Args) < 4 {
return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
}
grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
if err != nil {
return err
}
args := strings.Split(r.ExtraLLamaCPPArgs, " ")
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
args = append([]string{grpcProcess}, args...)
return syscall.Exec(

View file

@ -1,16 +0,0 @@
//go:build !p2p
// +build !p2p
package worker
import (
"fmt"
cliContext "github.com/mudler/LocalAI/core/cli/context"
)
type P2P struct{}
func (r *P2P) Run(ctx *cliContext.Context) error {
return fmt.Errorf("p2p mode is not enabled in this build")
}

View file

@ -1,6 +1,3 @@
//go:build p2p
// +build p2p
package worker
import (
@ -13,8 +10,6 @@ import (
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/pkg/assets"
"github.com/mudler/LocalAI/pkg/library"
"github.com/phayes/freeport"
"github.com/rs/zerolog/log"
)
@ -29,12 +24,6 @@ type P2P struct {
}
func (r *P2P) Run(ctx *cliContext.Context) error {
// Extract files from the embedded FS
err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
if err != nil {
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
}
// Check if the token is set
// as we always need it.
@ -71,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
for {
log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
if err != nil {
log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server")
return
@ -85,8 +74,6 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
cmd := exec.Command(
grpcProcess, args...,
)

View file

@ -6,7 +6,6 @@ import (
"regexp"
"time"
rice "github.com/GeertJohan/go.rice"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log"
)
@ -17,7 +16,6 @@ type ApplicationConfig struct {
ModelPath string
BackendsPath string
ExternalBackends []string
LibPath string
UploadLimitMB, Threads, ContextSize int
F16 bool
Debug bool
@ -50,9 +48,6 @@ type ApplicationConfig struct {
Galleries []Gallery
BackendGalleries []Gallery
BackendAssets *rice.Box
AssetsDestination string
ExternalGRPCBackends map[string]string
AutoloadGalleries, AutoloadBackendGalleries bool
@ -140,12 +135,6 @@ func WithP2PToken(s string) AppOption {
}
}
func WithLibPath(path string) AppOption {
return func(o *ApplicationConfig) {
o.LibPath = path
}
}
var EnableWatchDog = func(o *ApplicationConfig) {
o.WatchDog = true
}
@ -211,18 +200,6 @@ func WithCorsAllowOrigins(b string) AppOption {
}
}
func WithBackendAssetsOutput(out string) AppOption {
return func(o *ApplicationConfig) {
o.AssetsDestination = out
}
}
func WithBackendAssets(f *rice.Box) AppOption {
return func(o *ApplicationConfig) {
o.BackendAssets = f
}
}
func WithStringGalleries(galls string) AppOption {
return func(o *ApplicationConfig) {
if galls == "" {

View file

@ -126,8 +126,9 @@ func InstallModelFromGallery(
if err != nil {
return err
}
log.Debug().Msgf("Installed model %q", installedModel.Name)
if automaticallyInstallBackend && installedModel.Backend != "" {
log.Debug().Msgf("Installing backend %q", installedModel.Backend)
systemState, err := system.GetSystemState()
if err != nil {
return err

View file

@ -23,7 +23,6 @@ import (
. "github.com/onsi/gomega"
"gopkg.in/yaml.v3"
rice "github.com/GeertJohan/go.rice"
openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai"
"github.com/sashabaranov/go-openai/jsonschema"
@ -264,16 +263,6 @@ func getRequest(url string, header http.Header) (error, int, []byte) {
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
var backendAssets *rice.Box
func init() {
var err error
backendAssets, err = rice.FindBox("backend-assets")
if err != nil {
panic(err)
}
}
var _ = Describe("API test", func() {
var app *fiber.App
@ -300,9 +289,6 @@ var _ = Describe("API test", func() {
modelDir = filepath.Join(tmpdir, "models")
err = os.Mkdir(modelDir, 0750)
Expect(err).ToNot(HaveOccurred())
backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
err = os.Mkdir(backendAssetsDir, 0750)
Expect(err).ToNot(HaveOccurred())
c, cancel = context.WithCancel(context.Background())
@ -341,8 +327,7 @@ var _ = Describe("API test", func() {
config.WithModelPath(modelDir),
config.WithBackendsPath(backendPath),
config.WithApiKeys([]string{apiKey}),
config.WithBackendAssets(backendAssets),
config.WithBackendAssetsOutput(backendAssetsDir))...)
)...)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
@ -545,8 +530,7 @@ var _ = Describe("API test", func() {
config.WithBackendsPath(backendPath),
config.WithGalleries(galleries),
config.WithModelPath(modelDir),
config.WithBackendAssets(backendAssets),
config.WithBackendAssetsOutput(tmpdir))...,
)...,
)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
@ -803,6 +787,10 @@ var _ = Describe("API test", func() {
})
It("shows the external backend", func() {
// Only run on linux
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
// do an http request to the /system endpoint
resp, err := http.Get("http://127.0.0.1:9090/system")
Expect(err).ToNot(HaveOccurred())
@ -888,6 +876,13 @@ var _ = Describe("API test", func() {
// See tests/integration/stores_test
Context("Stores", Label("stores"), func() {
BeforeEach(func() {
// Only run on linux
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
})
It("sets, gets, finds and deletes entries", func() {
ks := [][]float32{
{0.1, 0.2, 0.3},

View file

@ -17,7 +17,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
return err
}
sb, err := backend.StoreBackend(sl, appConfig, input.Store)
sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}
@ -45,7 +45,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
return err
}
sb, err := backend.StoreBackend(sl, appConfig, input.Store)
sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}
@ -67,7 +67,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
return err
}
sb, err := backend.StoreBackend(sl, appConfig, input.Store)
sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}
@ -99,7 +99,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf
return err
}
sb, err := backend.StoreBackend(sl, appConfig, input.Store)
sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}

View file

@ -13,10 +13,7 @@ import (
// @Router /system [get]
func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
if err != nil {
return err
}
availableBackends := []string{}
loadedModels := ml.ListModels()
for b := range appConfig.ExternalGRPCBackends {
availableBackends = append(availableBackends, b)

View file

@ -5,7 +5,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
@ -37,7 +36,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
"Models": modelsWithoutConfig,
"ModelsConfig": backendConfigs,
"GalleryConfig": galleryConfigs,
"IsP2PEnabled": p2p.IsP2PEnabled(),
"ApplicationConfig": appConfig,
"ProcessingModels": processingModels,
"TaskTypes": taskTypes,

View file

@ -6,7 +6,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
@ -80,10 +79,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
// p2p
if p2p.IsP2PEnabled() {
router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
}
router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
router.Get("/version", func(c *fiber.Ctx) error {
return c.JSON(struct {

View file

@ -25,38 +25,39 @@ func RegisterUIRoutes(app *fiber.App,
app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
if p2p.IsP2PEnabled() {
app.Get("/p2p", func(c *fiber.Ctx) error {
summary := fiber.Map{
"Title": "LocalAI - P2P dashboard",
"BaseURL": utils.BaseURL(c),
"Version": internal.PrintableVersion(),
//"Nodes": p2p.GetAvailableNodes(""),
//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
"IsP2PEnabled": p2p.IsP2PEnabled(),
"P2PToken": appConfig.P2PToken,
"NetworkID": appConfig.P2PNetworkID,
}
// P2P
app.Get("/p2p", func(c *fiber.Ctx) error {
summary := fiber.Map{
"Title": "LocalAI - P2P dashboard",
"BaseURL": utils.BaseURL(c),
"Version": internal.PrintableVersion(),
//"Nodes": p2p.GetAvailableNodes(""),
//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
// Render index
return c.Render("views/p2p", summary)
})
"P2PToken": appConfig.P2PToken,
"NetworkID": appConfig.P2PNetworkID,
}
/* show nodes live! */
app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
})
app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
})
// Render index
return c.Render("views/p2p", summary)
})
app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
})
app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
})
}
/* show nodes live! */
app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
})
app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
})
app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
})
app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
})
// End P2P
if !appConfig.DisableGalleryEndpoint {
registerGalleryRoutes(app, cl, appConfig, galleryService, processingOps)
@ -76,8 +77,8 @@ func RegisterUIRoutes(app *fiber.App,
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0],
"IsP2PEnabled": p2p.IsP2PEnabled(),
"Version": internal.PrintableVersion(),
"Version": internal.PrintableVersion(),
}
// Render index
@ -121,7 +122,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"Model": modelThatCanBeUsed,
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@ -151,7 +151,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@ -169,7 +168,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@ -203,7 +201,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": modelThatCanBeUsed,
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@ -221,7 +218,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@ -253,7 +249,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": modelThatCanBeUsed,
"IsP2PEnabled": p2p.IsP2PEnabled(),
"Version": internal.PrintableVersion(),
}

View file

@ -15,7 +15,6 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/elements"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/rs/zerolog/log"
@ -71,7 +70,6 @@ func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationC
"ProcessingBackends": processingBackendsData,
"AvailableBackends": len(backends),
"TaskTypes": taskTypes,
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
if page == "" {

View file

@ -15,7 +15,6 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/elements"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/rs/zerolog/log"
@ -70,9 +69,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
"AllTags": tags,
"ProcessingModels": processingModelsData,
"AvailableModels": len(models),
"IsP2PEnabled": p2p.IsP2PEnabled(),
"TaskTypes": taskTypes,
"TaskTypes": taskTypes,
// "ApplicationConfig": appConfig,
}

View file

@ -268,7 +268,7 @@
Command to connect (click to copy):
</p>
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyToken($el.textContent)" >
docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master-ffmpeg-core federated --debug
docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master federated --debug
</code>
or via CLI:
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyToken($el.textContent)" >

View file

@ -49,11 +49,11 @@
</div>
<!-- Warning box if p2p token is empty and p2p is enabled -->
{{ if and .IsP2PEnabled (eq .P2PToken "") }}
{{ if eq .P2PToken "" }}
<div class="bg-gradient-to-r from-red-800/70 to-red-700/70 border border-red-600/50 p-6 rounded-xl shadow-lg mb-10 text-left">
<div class="flex items-center mb-2">
<i class="fa-solid fa-exclamation-triangle text-red-300 text-2xl mr-3"></i>
<h3 class="text-xl font-bold text-white">Warning: P2P mode is disabled or no token was specified</h3>
<h3 class="text-xl font-bold text-white">Warning: P2P token was not specified</h3>
</div>
<p class="mb-4 text-red-200">
You have to enable P2P mode by starting LocalAI with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code>. Please restart the server with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code> to generate a new token automatically that can be used to discover other nodes. If you already have a token, specify it with <code class="bg-red-900/50 px-2 py-0.5 rounded">export TOKEN=".."</code>

View file

@ -40,11 +40,9 @@
<a href="talk/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
<i class="fa-solid fa-phone text-blue-400 mr-2"></i>Talk
</a>
{{ if .IsP2PEnabled }}
<a href="p2p/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
<i class="fa-solid fa-circle-nodes text-blue-400 mr-2"></i>Swarm
</a>
{{ end }}
<a href="swagger/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
<i class="fas fa-code text-blue-400 mr-2"></i>API
</a>
@ -75,11 +73,9 @@
<a href="talk/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
<i class="fa-solid fa-phone text-blue-400 mr-3 w-5 text-center"></i>Talk
</a>
{{ if .IsP2PEnabled }}
<a href="p2p/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
<i class="fa-solid fa-circle-nodes text-blue-400 mr-3 w-5 text-center"></i>Swarm
</a>
{{ end }}
<a href="swagger/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
<i class="fas fa-code text-blue-400 mr-3 w-5 text-center"></i>API
</a>

View file

@ -1,6 +1,3 @@
//go:build p2p
// +build p2p
package p2p
import (

View file

@ -1,6 +1,3 @@
//go:build p2p
// +build p2p
package p2p
import (
@ -65,10 +62,6 @@ func GenerateToken(DHTInterval, OTPInterval int) string {
return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
}
func IsP2PEnabled() bool {
return true
}
func nodeID(s string) string {
hostname, _ := os.Hostname()
return fmt.Sprintf("%s-%s", hostname, s)

View file

@ -1,35 +0,0 @@
//go:build !p2p
// +build !p2p
package p2p
import (
"context"
"fmt"
"github.com/mudler/edgevpn/pkg/node"
)
func GenerateToken(DHTInterval, OTPInterval int) string {
return "not implemented"
}
func (f *FederatedServer) Start(ctx context.Context) error {
return fmt.Errorf("not implemented")
}
func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData), allocate bool) error {
return fmt.Errorf("not implemented")
}
func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
return nil, fmt.Errorf("not implemented")
}
func IsP2PEnabled() bool {
return false
}
func NewNode(token string) (*node.Node, error) {
return nil, fmt.Errorf("not implemented")
}

View file

@ -63,23 +63,29 @@ type VADResponse struct {
Segments []VADSegment `json:"segments" yaml:"segments"`
}
type StoreCommon struct {
Backend string `json:"backend,omitempty" yaml:"backend,omitempty"`
}
type StoresSet struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys" yaml:"keys"`
Values []string `json:"values" yaml:"values"`
StoreCommon
}
type StoresDelete struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys"`
StoreCommon
}
type StoresGet struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys" yaml:"keys"`
StoreCommon
}
type StoresGetResponse struct {
@ -92,6 +98,7 @@ type StoresFind struct {
Key []float32 `json:"key" yaml:"key"`
Topk int `json:"topk" yaml:"topk"`
StoreCommon
}
type StoresFindResponse struct {

View file

@ -5,7 +5,7 @@ services:
# Available images with CUDA, ROCm, SYCL
# Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags
# Image list (dockerhub): https://hub.docker.com/r/localai/localai
image: quay.io/go-skynet/local-ai:master-ffmpeg-core
image: quay.io/go-skynet/local-ai:master
build:
context: .
dockerfile: Dockerfile

View file

@ -579,38 +579,14 @@ You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to r
### Extra backends
LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends.
If you wish to build a custom container image with extra backends, you can use the core images and build only the backends you are interested into or prepare the environment on startup by using the `EXTRA_BACKENDS` environment variable. For instance, to use the diffusers backend:
```Dockerfile
FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
RUN make -C backend/python/diffusers
```
Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--external-grpc-backends` as CLI flag) to point to the backends you are using (`EXTERNAL_GRPC_BACKENDS="backend_name:/path/to/backend"`), for example with diffusers:
```Dockerfile
FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
RUN make -C backend/python/diffusers
ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh"
```
{{% alert note %}}
You can specify remote external backends or path to local files. The syntax is `backend-name:/path/to/backend` or `backend-name:host:port`.
{{% /alert %}}
LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. See the [backend section](https://localai.io/backends/) for more details on how to install and build new backends for LocalAI.
#### In runtime
When using the `-core` container image it is possible to prepare the python backends you are interested into by using the `EXTRA_BACKENDS` variable, for instance:
```bash
docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core
docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master
```
### Concurrent requests

View file

@ -73,8 +73,6 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
- CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
- CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...
In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:
@ -259,7 +257,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/
### Container images
To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...
To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16`, ...
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
@ -268,7 +266,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
```bash
docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32 phi-2
```
### Notes
@ -276,7 +274,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -
In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
```bash
docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16
```
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.

View file

@ -44,7 +44,6 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
{{% alert note %}}
- The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use.
- If you are running `LocalAI` manually you must install the python dependencies (`make prepare-extra-conda-environments`). This requires `conda` to be installed.
- For local execution, you also have to specify the extra backend in the `EXTERNAL_GRPC_BACKENDS` environment variable.
- Example: `EXTERNAL_GRPC_BACKENDS="sentencetransformers:/path/to/LocalAI/backend/python/sentencetransformers/sentencetransformers.py"`
- The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`.

View file

@ -18,8 +18,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
{{% /alert %}}

View file

@ -23,7 +23,7 @@ MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branc
Here's an example to initiate the **phi-2** model:
```bash
docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
```
You can also check all the embedded models configurations [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
@ -64,7 +64,7 @@ Then, launch LocalAI using your gist's URL:
```bash
## Important! Substitute with your gist's URL!
docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/xxxx/phi-2.yaml
docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/xxxx/phi-2.yaml
```
## Next Steps

View file

@ -2,6 +2,7 @@
name: "alpaca"
config_file: |
backend: "llama-cpp"
context_size: 4096
f16: true
mmap: true

View file

@ -2,6 +2,7 @@
name: "chatml"
config_file: |
backend: "llama-cpp"
mmap: true
function:
disable_no_action: true

View file

@ -2,6 +2,7 @@
name: "chatml-hercules"
config_file: |
backend: "llama-cpp"
mmap: true
function:
# disable injecting the "answer" tool

View file

@ -2,6 +2,7 @@
name: "chatml"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "command-r"
config_file: |
backend: "llama-cpp"
context_size: 131072
stopwords:
- "<|END_OF_TURN_TOKEN|>"

View file

@ -2,6 +2,7 @@
name: "deephermes"
config_file: |
backend: "llama-cpp"
mmap: true
context_size: 8192
stopwords:

View file

@ -2,6 +2,7 @@
name: "deepseek-r1"
config_file: |
backend: "llama-cpp"
context_size: 131072
mmap: true
f16: true

View file

@ -2,6 +2,7 @@
name: "deepseek"
config_file: |
backend: "llama-cpp"
mmap: true
context_size: 8192
template:

View file

@ -2,6 +2,7 @@
name: "falcon3"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "gemma"
config_file: |
backend: "llama-cpp"
mmap: true
context_size: 8192
template:

View file

@ -2,6 +2,7 @@
name: "granite"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "granite-3.2"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "hermes-2-pro-mistral"
config_file: |
backend: "llama-cpp"
mmap: true
context_size: 8192
stopwords:

View file

@ -2,6 +2,7 @@
name: "llama3-instruct"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "llama3-instruct-grammar"
config_file: |
backend: "llama-cpp"
mmap: true
function:
disable_no_action: true

View file

@ -2,6 +2,7 @@
name: "llama3-instruct"
config_file: |
backend: "llama-cpp"
mmap: true
function:
disable_no_action: true

View file

@ -2,6 +2,7 @@
name: "llama3-instruct"
config_file: |
backend: "llama-cpp"
mmap: true
cutstrings:
- (.*?)</thinking>

View file

@ -2,6 +2,7 @@
name: "llama3.2-fcall"
config_file: |
backend: "llama-cpp"
mmap: true
function:
json_regex_match:

View file

@ -2,6 +2,7 @@
name: "llama3.2-quantized"
config_file: |
backend: "llama-cpp"
mmap: true
function:
disable_no_action: true

View file

@ -2,6 +2,7 @@
name: "mathstral"
config_file: |
backend: "llama-cpp"
context_size: 8192
mmap: true
stopwords:

View file

@ -2,6 +2,7 @@
name: "mistral-0.3"
config_file: |
backend: "llama-cpp"
context_size: 8192
mmap: true
stopwords:

View file

@ -3,6 +3,7 @@ name: "moondream2"
config_file: |
backend: "llama-cpp"
context_size: 2046
roles:
user: "\nQuestion: "

View file

@ -2,6 +2,7 @@
name: localai
config_file: |-
backend: "llama-cpp"
context_size: 8192
stopwords:
- <|im_end|>

View file

@ -2,6 +2,7 @@
name: "phi-2-chatml"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "phi-2-orange"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "phi-3-chat"
config_file: |
backend: "llama-cpp"
mmap: true
template:
chat_message: |

View file

@ -2,6 +2,7 @@
name: "phi-4-chat"
config_file: |
backend: "llama-cpp"
mmap: true
function:
json_regex_match:

Some files were not shown because too many files have changed in this diff Show more