feat: refactor build process, drop embedded backends (#5875)

* feat: split remaining backends and drop embedded backends - Drop silero-vad, huggingface, and stores backend from embedded binaries - Refactor Makefile and Dockerfile to avoid building grpc backends - Drop golang code that was used to embed backends - Simplify building by using goreleaser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(gallery): be specific with llama-cpp backend templates Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(docs): update Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(ci): minor fixes Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: drop all ffmpeg references Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: run protogen-go Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Always enable p2p mode Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update gorelease file Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(stores): do not always load Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix linting issues Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Mac OS fixup Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-24 09:28:23 +00:00 · 2025-07-22 16:31:04 +02:00 · 2025-07-22 16:31:04 +02:00 · 98e5291afc
commit 98e5291afc
parent e29b2c3aff
118 changed files with 631 additions and 1339 deletions
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@ -2,9 +2,6 @@

 cd /workspace

-# Grab the pre-stashed backend assets to avoid build issues
-cp -r /build/backend-assets /workspace/backend-assets
-
 # Ensures generated source files are present upon load
 make prepare

--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@ -4,9 +4,6 @@ services:
      context: ..
      dockerfile: Dockerfile
      target: devcontainer
-      args:
-      - FFMPEG=true
-      - GO_TAGS=p2p tts
    env_file:
      - ../.env
    ports:
--- a/.env
+++ b/.env
@ -41,13 +41,6 @@
 ## Uncomment and set to true to enable rebuilding from source
 # REBUILD=true

-## Enable go tags, available: p2p, tts
-## p2p: enable distributed inferencing
-## tts: enables text-to-speech with go-piper 
-## (requires REBUILD=true)
-#
-# GO_TAGS=p2p
-
 ## Path where to store generated images
 # LOCALAI_IMAGE_PATH=/tmp/generated/images

--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@ -43,7 +43,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -55,7 +55,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -67,7 +67,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -79,7 +79,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -91,7 +91,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -104,7 +104,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -116,7 +116,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -128,7 +128,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -140,7 +140,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-bark'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -152,7 +152,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -165,7 +165,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -177,7 +177,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -189,7 +189,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -201,7 +201,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -213,7 +213,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -226,7 +226,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -238,7 +238,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -250,7 +250,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -262,7 +262,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-bark'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -274,7 +274,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -287,7 +287,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -299,7 +299,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -311,7 +311,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -323,7 +323,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -335,7 +335,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -348,7 +348,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -360,7 +360,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -372,7 +372,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -384,7 +384,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-bark'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -397,7 +397,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -409,7 +409,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -421,7 +421,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -433,7 +433,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -445,7 +445,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -457,7 +457,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -469,7 +469,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -481,7 +481,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -493,7 +493,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -506,7 +506,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -518,7 +518,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -530,7 +530,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -542,7 +542,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -554,7 +554,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -566,7 +566,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -578,7 +578,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-bark'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -590,7 +590,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-bark'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -603,7 +603,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-piper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -616,7 +616,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-bark-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -628,7 +628,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-cpu-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -652,7 +652,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -665,7 +665,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-cpu-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -677,7 +677,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -689,7 +689,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -701,7 +701,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -713,7 +713,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -725,7 +725,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -749,8 +749,8 @@ jobs:
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
            tag-suffix: '-cpu-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -762,7 +762,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -774,7 +774,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -786,7 +786,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -798,7 +798,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@ -810,7 +810,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@ -842,6 +842,45 @@ jobs:
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
+          #silero-vad
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-silero-vad'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "silero-vad"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          # local-store
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-local-store'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "local-store"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          # huggingface
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-huggingface'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "huggingface"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"  
  llama-cpp-darwin:
    runs-on: macOS-14
    strategy:
@ -866,7 +905,7 @@ jobs:
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
-          make build-api
+          make build
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
          mv build/darwin.tar build/llama-cpp.tar
@ -954,7 +993,7 @@ jobs:
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
-          make build-api
+          make build
          export PLATFORMARCH=darwin/amd64
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@ -0,0 +1,23 @@
+name: Build test
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  build-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.23
+      - name: Run GoReleaser
+        run: |
+          make dev-dist
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@ -31,7 +31,7 @@ jobs:
          make protogen-go
      - name: Build api
        run: |
-          CGO_ENABLED=0 make build-api
+          CGO_ENABLED=0 make build
      - name: rm
        uses: appleboy/ssh-action@v1.2.2
        with:
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@ -14,7 +14,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -40,8 +39,7 @@ jobs:
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
-            ffmpeg: 'true'
+            tag-suffix: '-gpu-nvidia-cuda12'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
@ -49,7 +47,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-hipblas'
-            ffmpeg: 'false'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
@ -59,15 +56,13 @@ jobs:
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: 'sycl-f16-ffmpeg'
-            ffmpeg: 'true'
+            tag-suffix: 'sycl-f16'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-vulkan-ffmpeg-core'
-            ffmpeg: 'true'
+            tag-suffix: '-vulkan-core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -18,7 +18,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -40,7 +39,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-hipblas'
-            ffmpeg: 'true'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
@ -52,7 +50,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -76,7 +73,6 @@ jobs:
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: ''
-            ffmpeg: 'true'
            base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
            aio: "-aio-cpu"
@ -88,7 +84,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda11'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
@ -100,7 +95,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda12'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
@ -110,7 +104,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-vulkan'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
@ -122,7 +115,6 @@ jobs:
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-gpu-intel-f16'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
            aio: "-aio-gpu-intel-f16"
@ -132,7 +124,6 @@ jobs:
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-gpu-intel-f32'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
            aio: "-aio-gpu-intel-f32"
@ -142,7 +133,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -167,7 +157,6 @@ jobs:
            platforms: 'linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64'
-            ffmpeg: 'true'
            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
            runs-on: 'ubuntu-24.04-arm'
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@ -37,10 +37,6 @@ on:
        description: 'Tag suffix'
        default: ''
        type: string
-      ffmpeg:
-        description: 'FFMPEG'
-        default: ''
-        type: string
      skip-drivers:
        description: 'Skip drivers by default'
        default: 'false'
@ -236,7 +232,6 @@ jobs:
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@ -264,7 +259,6 @@ jobs:
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@ -96,7 +96,7 @@ jobs:
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
+        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.8.1
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -1,399 +1,26 @@
-name: Build and Release
+name: goreleaser

 on:
  push:
-    branches:
-      - master
    tags:
      - 'v*'
-  pull_request:
-
-env:
-  GRPC_VERSION: v1.65.0
-
-permissions:
-  contents: write
-
-concurrency:
-  group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true

 jobs:
-
-  # TODO: temporary disable linux-arm64 build
-  # build-linux-arm:
-  #   runs-on: ubuntu-24.04-arm
-  #   steps:
-  #     - name: Free Disk Space (Ubuntu)
-  #       uses: jlumbroso/free-disk-space@main
-  #       with:
-  #         # this might remove tools that are actually needed,
-  #         # if set to "true" but frees about 6 GB
-  #         tool-cache: true
-  #         # all of these default to true, but feel free to set to
-  #         # "false" if necessary for your workflow
-  #         android: true
-  #         dotnet: true
-  #         haskell: true
-  #         large-packages: true
-  #         docker-images: true
-  #         swap-storage: true
-
-  #     - name: Release space from worker
-  #       run: |
-  #         echo "Listing top largest packages"
-  #         pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #         head -n 30 <<< "${pkgs}"
-  #         echo
-  #         df -h
-  #         echo
-  #         sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-  #         sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-  #         sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo apt-get remove -y '^mono-.*' || true
-  #         sudo apt-get remove -y '^ghc-.*' || true
-  #         sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-  #         sudo apt-get remove -y 'php.*' || true
-  #         sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-  #         sudo apt-get remove -y '^google-.*' || true
-  #         sudo apt-get remove -y azure-cli || true
-  #         sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-  #         sudo apt-get remove -y '^gfortran-.*' || true
-  #         sudo apt-get remove -y microsoft-edge-stable || true
-  #         sudo apt-get remove -y firefox || true
-  #         sudo apt-get remove -y powershell || true
-  #         sudo apt-get remove -y r-base-core || true
-  #         sudo apt-get autoremove -y
-  #         sudo apt-get clean
-  #         echo
-  #         echo "Listing top largest packages"
-  #         pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #         head -n 30 <<< "${pkgs}"
-  #         echo
-  #         sudo rm -rfv build || true
-  #         sudo rm -rf /usr/share/dotnet || true
-  #         sudo rm -rf /opt/ghc || true
-  #         sudo rm -rf "/usr/local/share/boost" || true
-  #         sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-  #         df -h
-
-  #     - name: Force Install GIT latest
-  #       run: |
-  #         sudo apt-get update \
-  #         && sudo apt-get install -y software-properties-common \
-  #         && sudo apt-get update \
-  #         && sudo add-apt-repository -y ppa:git-core/ppa \
-  #         && sudo apt-get update \
-  #         && sudo apt-get install -y git
-  #     - name: Clone
-  #       uses: actions/checkout@v4
-  #       with:
-  #         submodules: true
-  #     - uses: actions/setup-go@v5
-  #       with:
-  #         go-version: '1.21.x'
-  #         cache: false
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-  #         make install-go-tools
-  #     - name: Install CUDA Dependencies
-  #       run: |
-  #         curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
-  #         sudo dpkg -i cuda-keyring_1.1-1_all.deb
-  #         sudo apt-get update
-  #         sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
-  #       env:
-  #         CUDA_VERSION: 12-5
-  #     - name: Cache grpc
-  #       id: cache-grpc
-  #       uses: actions/cache@v4
-  #       with:
-  #         path: grpc
-  #         key: ${{ runner.os }}-grpc-arm64-${{ env.GRPC_VERSION }}
-  #     - name: Build grpc
-  #       if: steps.cache-grpc.outputs.cache-hit != 'true'
-  #       run: |
-  #         git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-  #         cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-  #         cd cmake/build && cmake -DgRPC_INSTALL=ON \
-  #           -DgRPC_BUILD_TESTS=OFF \
-  #           ../.. && sudo make --jobs 5 --output-sync=target
-  #     - name: Install gRPC
-  #       run: |
-  #         cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-  #     # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
-  #     - name: Build
-  #       id: build
-  #       run: |
-  #         go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-  #         go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-  #         export PATH=$PATH:$GOPATH/bin
-  #         export PATH=/usr/local/cuda/bin:$PATH
-  #         sudo cp /lib64/ld-linux-aarch64.so.1 ld.so
-  #         BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/aarch64-linux-gnu/libdl.so.2 /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
-  #         make -j4 dist
-  #     - uses: actions/upload-artifact@v4
-  #       with:
-  #         name: LocalAI-linux-arm64
-  #         path: release/
-  #     - name: Release
-  #       uses: softprops/action-gh-release@v2
-  #       if: startsWith(github.ref, 'refs/tags/')
-  #       with:
-  #         files: |
-  #           release/*
-  #     - name: Setup tmate session if tests fail
-  #       if: ${{ failure() }}
-  #       uses: mxschmitt/action-tmate@v3.22
-  #       with:
-  #         detached: true
-  #         connect-timeout-seconds: 180
-  #         limit-access-to-actor: true
-  build-linux:
+  goreleaser:
    runs-on: ubuntu-latest
    steps:
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: true
-          swap-storage: true
-
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - name: Clone
+      - name: Checkout
        uses: actions/checkout@v4
        with:
-          submodules: true
-      - uses: actions/setup-go@v5
+          fetch-depth: 0
+      - name: Set up Go
+        uses: actions/setup-go@v5
        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-          make install-go-tools
-      - name: Intel Dependencies
-        run: |
-          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
-          echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
-          sudo apt update
-          sudo apt install -y intel-basekit
-      - name: Install CUDA Dependencies
-        run: |
-          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt-get update
-          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+          go-version: 1.23
+      - name: Run GoReleaser
+        uses: goreleaser/goreleaser-action@v6
+        with:
+          version: v2.11.0
+          args: release --clean
        env:
-          CUDA_VERSION: 12-5
-      - name: "Install Hipblas"
-        env:
-          ROCM_VERSION: "6.1"
-          AMDGPU_VERSION: "6.1"
-        run: |
-            set -ex
-
-            sudo apt-get update
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
-
-            sudo apt update
-            wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
-            sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
-            sudo apt update
-
-            sudo amdgpu-install --usecase=rocm
-
-            sudo apt-get clean
-            sudo rm -rf /var/lib/apt/lists/*
-            sudo ldconfig
-      - name: Cache grpc
-        id: cache-grpc
-        uses: actions/cache@v4
-        with:
-          path: grpc
-          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
-      - name: Build grpc
-        if: steps.cache-grpc.outputs.cache-hit != 'true'
-        run: |
-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
-            -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make --jobs 5 --output-sync=target
-      - name: Install gRPC
-        run: |
-          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
-      - name: Build
-        id: build
-        run: |
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          export PATH=$PATH:$GOPATH/bin
-          export PATH=/usr/local/cuda/bin:$PATH
-          export PATH=/opt/rocm/bin:$PATH
-          source /opt/intel/oneapi/setvars.sh
-          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
-          make -j4 dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-linux
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-
-  build-macOS-x86_64:
-    runs-on: macos-13
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc
-          make install-go-tools
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-x86_64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  build-macOS-arm64:
-    runs-on: macos-14
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc libomp llvm
-          make install-go-tools
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export CC=/opt/homebrew/opt/llvm/bin/clang
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-arm64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -75,7 +75,6 @@ jobs:
          rm protoc.zip
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install github.com/GeertJohan/go.rice/rice@latest
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Dependencies
        run: |
@ -103,7 +102,7 @@ jobs:

          make -C backend/python/transformers

-          make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
+          make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
        env:
          CUDA_VERSION: 12-4
      - name: Test
@ -164,11 +163,10 @@ jobs:
          rm protoc.zip
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install github.com/GeertJohan/go.rice/rice@latest
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Test
        run: |
-            PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
@ -199,11 +197,10 @@ jobs:
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
          pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
-          go install github.com/GeertJohan/go.rice/rice@latest
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
-          make build-api
+          make build
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
          mv build/darwin.tar build/llama-cpp.tar
--- a/.gitignore
+++ b/.gitignore
@ -10,6 +10,8 @@ prepare-sources
 /backend/cpp/llama-*
 !backend/cpp/llama-cpp
 /backends
+/backend-images
+/result.yaml

 *.log

--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@ -0,0 +1,33 @@
+version: 2
+before:
+  hooks:
+    - make protogen-go
+    - go mod tidy
+dist: release
+source:
+  enabled: true
+  name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
+builds:
+  -
+    env:
+      - CGO_ENABLED=0
+    ldflags:
+      - -s -w
+      - -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
+      - -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
+    goos:
+      - linux
+      - darwin
+      #- windows
+    goarch:
+      - amd64
+      - arm64
+archives:
+  - formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
+    name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
+checksum:
+  name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
+snapshot:
+  version_template: "{{ .Tag }}-next"
+changelog:
+  use: github-native
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -26,7 +26,7 @@
                "LOCALAI_P2P": "true",
                "LOCALAI_FEDERATED": "true"
            },
-            "buildFlags": ["-tags", "p2p tts", "-v"],
+            "buildFlags": ["-tags", "", "-v"],
            "envFile": "${workspaceFolder}/.env",
            "cwd": "${workspaceRoot}"
        }
--- a/10
+++ b/10
@ -142,10 +142,9 @@ EOT
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin

-# Install grpc compilers and rice
+# Install grpc compilers
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
-    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
-    go install github.com/GeertJohan/go.rice/rice@latest
+    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af

 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
@ -194,7 +193,7 @@ RUN apt-get update && \

 FROM build-requirements AS builder-base

-ARG GO_TAGS="p2p"
+ARG GO_TAGS=""
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
@ -249,8 +248,7 @@ COPY ./pkg/utils ./pkg/utils
 COPY ./pkg/langchain ./pkg/langchain

 RUN ls -l ./
-RUN make backend-assets
-RUN make grpcs
+RUN make protogen-go

 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
--- a/5
+++ b/5
@ -1,5 +0,0 @@
-VERSION 0.7
-
-build:
-    FROM DOCKERFILE -f Dockerfile .
-    SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
--- a/221
+++ b/221
@ -3,9 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai

-ONNX_VERSION?=1.20.0
-ONNX_ARCH?=x64
-ONNX_OS?=linux
+GORELEASER?=

 export BUILD_TYPE?=

@ -35,77 +33,33 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
-	UPX=
-else
-	UPX=$(shell which upx)
-endif
-
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1

 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
-# Detect if we are running on arm64
-ifneq (,$(findstring aarch64,$(shell uname -m)))
-	ONNX_ARCH=aarch64
-endif

 ifeq ($(OS),Darwin)
-	ONNX_OS=osx
-	ifneq (,$(findstring aarch64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else ifneq (,$(findstring arm64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else
-		ONNX_ARCH=x86_64
-	endif
-
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
 endif

-ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
-ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
-ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
-ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
-# Use filter-out to remove the specified backends
-ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
+# check if goreleaser exists
+ifeq (, $(shell which goreleaser))
+	GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
+else
+	GORELEASER=$(shell which goreleaser)
+endif

-GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
 TEST_PATHS?=./api/... ./pkg/... ./core/...

-# If empty, then we build all
-ifeq ($(GRPC_BACKENDS),)
-	GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
-endif
-
-ifeq ($(BUILD_API_ONLY),true)
-	GRPC_BACKENDS=
-endif

 .PHONY: all test build vendor

 all: help

-sources/onnxruntime:
-	mkdir -p sources/onnxruntime
-	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
-
-backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
-	cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
-ifeq ($(OS),Darwin)
-	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
-else
-	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
-endif
-
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
@ -116,58 +70,33 @@ clean: ## Remove build related file
 	rm -f prepare
 	rm -rf $(BINARY_NAME)
 	rm -rf release/
-	rm -rf backend-assets/*
-	$(MAKE) -C backend/cpp/grpc clean
 	$(MAKE) protogen-clean
 	rmdir pkg/grpc/proto || true

 clean-tests:
 	rm -rf test-models
 	rm -rf test-dir
-	rm -rf core/http/backend-assets
-
-clean-dc: clean
-	cp -r /build/backend-assets /workspace/backend-assets

 ## Install Go tools
 install-go-tools:
 	go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
 	go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-	go install github.com/GeertJohan/go.rice/rice@latest

 ## Build:
-build: backend-assets grpcs install-go-tools ## Build the project
+build: protogen-go install-go-tools ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
-ifneq ($(BACKEND_LIBS),)
-	$(MAKE) backend-assets/lib
-	cp -f $(BACKEND_LIBS) backend-assets/lib/
-endif
 	rm -rf $(BINARY_NAME) || true
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
-	rice append --exec $(BINARY_NAME)

-build-api:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
-
-backend-assets/lib:
-	mkdir -p backend-assets/lib
+dev-dist:
+	$(GORELEASER) build --snapshot --clean

 dist:
-	GO_TAGS="p2p" $(MAKE) build
-	GO_TAGS="p2p" STATIC=true $(MAKE) build
-	mkdir -p release
-# if BUILD_ID is empty, then we don't append it to the binary name
-ifeq ($(BUILD_ID),)
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
-	shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
-else
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
-	shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
-endif
+	$(GORELEASER) build --clean

 osx-signed: build
 	codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
@ -185,8 +114,7 @@ test-models/testmodel.ggml:
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
 	cp tests/models_fixtures/* test-models

-prepare-test: grpcs
-	cp -rf backend-assets core/http
+prepare-test: protogen-go
 	cp tests/models_fixtures/* test-models

 ########################################################
@ -194,7 +122,7 @@ prepare-test: grpcs
 ########################################################

 ## Test targets
-test: test-models/testmodel.ggml grpcs
+test: test-models/testmodel.ggml protogen-go
 	@echo 'Running tests'
 	export GO_TAGS="debug"
 	$(MAKE) prepare-test
@ -204,17 +132,26 @@ test: test-models/testmodel.ggml grpcs
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion

-backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build-api
+backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"

-backends/piper: docker-build-piper docker-save-piper build-api
+backends/piper: docker-build-piper docker-save-piper build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"

-backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build-api
+backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"

-backends/whisper: docker-build-whisper docker-save-whisper build-api
+backends/whisper: docker-build-whisper docker-save-whisper build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
+	
+backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
+
+backends/local-store: docker-build-local-store docker-save-local-store build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
+
+backends/huggingface: docker-build-huggingface docker-save-huggingface build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"

 ########################################################
 ## AIO tests
@ -243,7 +180,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@ -275,9 +212,7 @@ test-stablediffusion: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

-test-stores: backend-assets/grpc/local-store
-	mkdir -p tests/integration/backend-assets/grpc
-	cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
+test-stores:
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration

 test-container:
@ -310,10 +245,42 @@ protogen: protogen-go protogen-python
 .PHONY: protogen-clean
 protogen-clean: protogen-go-clean protogen-python-clean

+protoc:
+	@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
+	ARCH_NAME=$$(uname -m); \
+	if [ "$$OS_NAME" = "darwin" ]; then \
+	  if [ "$$ARCH_NAME" = "arm64" ]; then \
+	    FILE=protoc-31.1-osx-aarch_64.zip; \
+	  elif [ "$$ARCH_NAME" = "x86_64" ]; then \
+	    FILE=protoc-31.1-osx-x86_64.zip; \
+	  else \
+	    echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
+	  fi; \
+	elif [ "$$OS_NAME" = "linux" ]; then \
+	  if [ "$$ARCH_NAME" = "x86_64" ]; then \
+	    FILE=protoc-31.1-linux-x86_64.zip; \
+	  elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
+	    FILE=protoc-31.1-linux-aarch_64.zip; \
+	  elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
+	    FILE=protoc-31.1-linux-ppcle_64.zip; \
+	  elif [ "$$ARCH_NAME" = "s390x" ]; then \
+	    FILE=protoc-31.1-linux-s390_64.zip; \
+	  elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
+	    FILE=protoc-31.1-linux-x86_32.zip; \
+	  else \
+	    echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
+	  fi; \
+	else \
+	  echo "Unsupported OS: $$OS_NAME"; exit 1; \
+	fi; \
+	URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
+	curl -L -s $$URL -o protoc.zip && \
+	unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
+
 .PHONY: protogen-go
-protogen-go: install-go-tools
+protogen-go: protoc install-go-tools
 	mkdir -p pkg/grpc/proto
-	protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
+	./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
    backend/backend.proto

 .PHONY: protogen-go-clean
@ -407,19 +374,6 @@ vllm-protogen:
 vllm-protogen-clean:
 	$(MAKE) -C backend/python/vllm protogen-clean

-## GRPC
-# Note: it is duplicated in the Dockerfile
-prepare-extra-conda-environments: protogen-python
-	$(MAKE) -C backend/python/bark
-	$(MAKE) -C backend/python/coqui
-	$(MAKE) -C backend/python/diffusers
-	$(MAKE) -C backend/python/chatterbox
-	$(MAKE) -C backend/python/faster-whisper
-	$(MAKE) -C backend/python/vllm
-	$(MAKE) -C backend/python/rerankers
-	$(MAKE) -C backend/python/transformers
-	$(MAKE) -C backend/python/kokoro
-	$(MAKE) -C backend/python/exllama2

 prepare-test-extra: protogen-python
 	$(MAKE) -C backend/python/transformers
@ -433,37 +387,6 @@ test-extra: prepare-test-extra
 	$(MAKE) -C backend/python/chatterbox test
 	$(MAKE) -C backend/python/vllm test

-backend-assets:
-	mkdir -p backend-assets
-ifeq ($(BUILD_API_ONLY),true)
-	touch backend-assets/keep
-endif
-
-
-backend-assets/grpc:
-	mkdir -p backend-assets/grpc
-
-backend-assets/grpc/huggingface: protogen-go backend-assets/grpc
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/huggingface
-endif
-
-backend-assets/grpc/silero-vad: protogen-go backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/silero-vad
-endif
-
-backend-assets/grpc/local-store: backend-assets/grpc protogen-go
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/local-store
-endif
-
-grpcs: protogen-go $(GRPC_BACKENDS)
-
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
@ -506,7 +429,6 @@ docker-image-intel:
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .

 docker-image-intel-xpu:
@ -515,7 +437,6 @@ docker-image-intel-xpu:
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .

 ########################################################
@ -534,6 +455,24 @@ docker-build-bark-cpp:
 docker-build-piper:
 	docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .

+docker-build-local-store:
+	docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
+
+docker-build-huggingface:
+	docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
+
+docker-save-huggingface: backend-images
+	docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
+
+docker-save-local-store: backend-images
+	docker save local-ai-backend:local-store -o backend-images/local-store.tar
+
+docker-build-silero-vad:
+	docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
+
+docker-save-silero-vad: backend-images
+	docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
+
 docker-save-piper: backend-images
 	docker save local-ai-backend:piper -o backend-images/piper.tar

--- a/assets.go
+++ b/assets.go
@ -1,15 +0,0 @@
-package main
-
-import (
-	rice "github.com/GeertJohan/go.rice"
-)
-
-var backendAssets *rice.Box
-
-func init() {
-	var err error
-	backendAssets, err = rice.FindBox("backend-assets")
-	if err != nil {
-		panic(err)
-	}
-}
--- a/backend/cpp/llama-cpp/run.sh
+++ b/backend/cpp/llama-cpp/run.sh
@ -44,7 +44,7 @@ fi
 if [ "$(uname)" == "Darwin" ]; then
 	DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
 else
-	LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 # If there is a lib/ld.so, use it
--- a/backend/go/huggingface/Makefile
+++ b/backend/go/huggingface/Makefile
@ -0,0 +1,9 @@
+GOCMD=go
+
+huggingface:
+	CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
+
+package:
+	bash package.sh
+
+build: huggingface package
--- a/backend/go/llm/langchain/langchain.go
+++ b/backend/go/llm/langchain/langchain.go
--- a/backend/go/llm/langchain/main.go
+++ b/backend/go/llm/langchain/main.go
--- a/backend/go/huggingface/package.sh
+++ b/backend/go/huggingface/package.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+mkdir -p $CURDIR/package
+cp -avrf $CURDIR/huggingface $CURDIR/package/
+cp -rfv $CURDIR/run.sh $CURDIR/package/
--- a/backend/go/huggingface/run.sh
+++ b/backend/go/huggingface/run.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+exec $CURDIR/huggingface "$@"
--- a/backend/go/local-store/Makefile
+++ b/backend/go/local-store/Makefile
@ -0,0 +1,9 @@
+GOCMD=go
+
+local-store:
+	CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./
+
+package:
+	bash package.sh
+
+build: local-store package
--- a/backend/go/local-store/debug.go
+++ b/backend/go/local-store/debug.go
--- a/backend/go/local-store/main.go
+++ b/backend/go/local-store/main.go
--- a/backend/go/local-store/package.sh
+++ b/backend/go/local-store/package.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+mkdir -p $CURDIR/package
+cp -avrf $CURDIR/local-store $CURDIR/package/
+cp -rfv $CURDIR/run.sh $CURDIR/package/
--- a/backend/go/local-store/production.go
+++ b/backend/go/local-store/production.go
--- a/backend/go/local-store/run.sh
+++ b/backend/go/local-store/run.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+exec $CURDIR/local-store "$@"
--- a/backend/go/local-store/store.go
+++ b/backend/go/local-store/store.go
@ -4,6 +4,7 @@ package main
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"container/heap"
+	"errors"
 	"fmt"
 	"math"
 	"slices"
@ -99,6 +100,9 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
 }

 func (s *Store) Load(opts *pb.ModelOptions) error {
+	if opts.Model != "" {
+		return errors.New("not implemented")
+	}
 	return nil
 }

@ -315,7 +319,7 @@ func isNormalized(k []float32) bool {

 	for _, v := range k {
 		v64 := float64(v)
-		sum += v64*v64
+		sum += v64 * v64
 	}

 	s := math.Sqrt(sum)
--- a/backend/go/silero-vad/Makefile
+++ b/backend/go/silero-vad/Makefile
@ -0,0 +1,47 @@
+
+CURRENT_DIR=$(abspath ./)
+GOCMD=go
+
+ONNX_VERSION?=1.20.0
+ONNX_ARCH?=x64
+ONNX_OS?=linux
+
+# Detect if we are running on arm64
+ifneq (,$(findstring aarch64,$(shell uname -m)))
+	ONNX_ARCH=aarch64
+endif
+
+ifeq ($(OS),Darwin)
+	ONNX_OS=osx
+	ifneq (,$(findstring aarch64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else ifneq (,$(findstring arm64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else
+		ONNX_ARCH=x86_64
+	endif
+endif
+
+sources/onnxruntime:
+	mkdir -p sources/onnxruntime
+	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
+
+backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime
+	mkdir -p backend-assets/lib
+	cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
+ifeq ($(OS),Darwin)
+	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
+else
+	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
+endif
+
+silero-vad: backend-assets/lib/libonnxruntime.so.1
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./
+
+package:
+	bash package.sh
+
+build: silero-vad package
--- a/backend/go/silero-vad/main.go
+++ b/backend/go/silero-vad/main.go
--- a/backend/go/silero-vad/package.sh
+++ b/backend/go/silero-vad/package.sh
@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+# Create lib directory
+mkdir -p $CURDIR/package/lib
+
+cp -avrf $CURDIR/silero-vad $CURDIR/package/
+cp -avrf $CURDIR/run.sh $CURDIR/package/
+cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
+
+# Detect architecture and copy appropriate libraries
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+    # x86_64 architecture
+    echo "Detected x86_64 architecture, copying x86_64 libraries..."
+    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+    # ARM64 architecture
+    echo "Detected ARM64 architecture, copying ARM64 libraries..."
+    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+else
+    echo "Error: Could not detect architecture"
+    exit 1
+fi
+
+echo "Packaging completed successfully" 
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
--- a/backend/go/silero-vad/run.sh
+++ b/backend/go/silero-vad/run.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+
+# If there is a lib/ld.so, use it
+if [ -f $CURDIR/lib/ld.so ]; then
+	echo "Using lib/ld.so"
+	exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
+fi
+
+exec $CURDIR/silero-vad "$@"
--- a/backend/go/silero-vad/vad.go
+++ b/backend/go/silero-vad/vad.go
--- a/backend/index.yaml
+++ b/backend/index.yaml
@ -68,7 +68,7 @@
    default: "cpu-stablediffusion-ggml"
    nvidia: "cuda12-stablediffusion-ggml"
    intel: "intel-sycl-f16-stablediffusion-ggml"
-    #amd: "rocm-stablediffusion-ggml"
+    # amd: "rocm-stablediffusion-ggml"
    vulkan: "vulkan-stablediffusion-ggml"
    nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
    # metal: "metal-stablediffusion-ggml"
@ -285,6 +285,54 @@
  tags:
    - text-to-speech
    - TTS
+- &silero-vad
+  name: "silero-vad"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-silero-vad"
+  icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png
+  urls:
+    - https://github.com/snakers4/silero-vad
+  description: |
+    Silero VAD: pre-trained enterprise-grade Voice Activity Detector.
+    Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not.
+  tags:
+    - voice-activity-detection
+    - VAD
+    - silero-vad
+    - CPU
+- &local-store
+  name: "local-store"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store"
+  urls:
+    - https://github.com/mudler/LocalAI
+  description: |
+    Local Store is a local-first, self-hosted, and open-source vector database.
+  tags:
+    - vector-database
+    - local-first
+    - open-source
+    - CPU
+  license: MIT
+- &huggingface
+  name: "huggingface"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface"
+  icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
+  urls:
+    - https://huggingface.co/docs/hub/en/api
+  description: |
+    HuggingFace is a backend which uses the huggingface API to run models.
+  tags:
+    - LLM
+    - huggingface
+  license: MIT
+- !!merge <<: *huggingface
+  name: "huggingface-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
+- !!merge <<: *local-store
+  name: "local-store-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
+- !!merge <<: *silero-vad
+  name: "silero-vad-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
 - !!merge <<: *piper
  name: "piper-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-piper"
--- a/core/application/startup.go
+++ b/core/application/startup.go
@ -9,9 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
-	"github.com/mudler/LocalAI/pkg/assets"

-	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/mudler/LocalAI/pkg/model"
 	pkgStartup "github.com/mudler/LocalAI/pkg/startup"
 	"github.com/mudler/LocalAI/pkg/xsysinfo"
@ -103,23 +101,6 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

-	if options.AssetsDestination != "" {
-		// Extract files from the embedded FS
-		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
-		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
-		if err != nil {
-			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
-		}
-	}
-
-	if options.LibPath != "" {
-		// If there is a lib directory, set LD_LIBRARY_PATH to include it
-		err := library.LoadExternal(options.LibPath)
-		if err != nil {
-			log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
-		}
-	}
-
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
--- a/core/backend/options.go
+++ b/core/backend/options.go
@ -20,7 +20,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
 	defOpts := []model.Option{
 		model.WithBackendString(c.Backend),
 		model.WithModel(c.Model),
-		model.WithAssetDir(so.AssetsDestination),
 		model.WithContext(so.Context),
 		model.WithModelID(name),
 	}
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@ -7,14 +7,12 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 )

-func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
-	if storeName == "" {
-		storeName = "default"
+func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string, backend string) (grpc.Backend, error) {
+	if backend == "" {
+		backend = model.LocalStoreBackend
 	}
-
 	sc := []model.Option{
-		model.WithBackendString(model.LocalStoreBackend),
-		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithBackendString(backend),
 		model.WithModel(storeName),
 	}

--- a/core/cli/context/context.go
+++ b/core/cli/context/context.go
@ -1,13 +1,6 @@
 package cliContext

-import (
-	rice "github.com/GeertJohan/go.rice"
-)
-
 type Context struct {
 	Debug    bool    `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
 	LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
-
-	// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
-	BackendAssets *rice.Box `kong:"-"`
 }
--- a/core/cli/run.go
+++ b/core/cli/run.go
@ -23,7 +23,6 @@ type RunCMD struct {
 	ExternalBackends             []string      `env:"LOCALAI_EXTERNAL_BACKENDS,EXTERNAL_BACKENDS" help:"A list of external backends to load from gallery on boot" group:"backends"`
 	BackendsPath                 string        `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
 	ModelsPath                   string        `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath            string        `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
 	GeneratedContentPath         string        `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
 	UploadPath                   string        `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
 	ConfigPath                   string        `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
@ -46,7 +45,6 @@ type RunCMD struct {
 	Address                            string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	CORS                               bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
 	CORSAllowOrigins                   string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
-	LibraryPath                        string   `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
 	CSRF                               bool     `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
 	UploadLimit                        int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys                            []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
@ -99,10 +97,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
 		config.WithCsrf(r.CSRF),
-		config.WithLibPath(r.LibraryPath),
 		config.WithThreads(r.Threads),
-		config.WithBackendAssets(ctx.BackendAssets),
-		config.WithBackendAssetsOutput(r.BackendAssetsPath),
 		config.WithUploadLimitMB(r.UploadLimit),
 		config.WithApiKeys(r.APIKeys),
 		config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@ -27,7 +27,6 @@ type SoundGenerationCMD struct {
 	DoSample               bool     `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
 	OutputFile             string   `short:"o" type:"path" help:"The path to write the output wav file"`
 	ModelsPath             string   `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath      string   `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
 	ExternalGRPCBackends   []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
 }

@ -51,11 +50,10 @@ func parseToInt32Ptr(input string) *int32 {

 func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
 	outputFile := t.OutputFile
-	outputDir := t.BackendAssetsPath
+	outputDir := os.TempDir()
 	if outputFile != "" {
 		outputDir = filepath.Dir(outputFile)
 	}
-
 	text := strings.Join(t.Text, " ")

 	externalBackends := make(map[string]string)
@ -71,7 +69,6 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
 		ModelPath:            t.ModelsPath,
 		Context:              context.Background(),
 		GeneratedContentDir:  outputDir,
-		AssetsDestination:    t.BackendAssetsPath,
 		ExternalGRPCBackends: externalBackends,
 	}
 	ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
--- a/core/cli/transcript.go
+++ b/core/cli/transcript.go
@ -15,20 +15,18 @@ import (
 type TranscriptCMD struct {
 	Filename string `arg:""`

-	Backend           string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
-	Model             string `short:"m" required:"" help:"Model name to run the TTS"`
-	Language          string `short:"l" help:"Language of the audio file"`
-	Translate         bool   `short:"c" help:"Translate the transcription to english"`
-	Threads           int    `short:"t" default:"1" help:"Number of threads used for parallel computation"`
-	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	Backend    string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
+	Model      string `short:"m" required:"" help:"Model name to run the TTS"`
+	Language   string `short:"l" help:"Language of the audio file"`
+	Translate  bool   `short:"c" help:"Translate the transcription to english"`
+	Threads    int    `short:"t" default:"1" help:"Number of threads used for parallel computation"`
+	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 }

 func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
 	opts := &config.ApplicationConfig{
-		ModelPath:         t.ModelsPath,
-		Context:           context.Background(),
-		AssetsDestination: t.BackendAssetsPath,
+		ModelPath: t.ModelsPath,
+		Context:   context.Background(),
 	}

 	cl := config.NewBackendConfigLoader(t.ModelsPath)
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@ -17,18 +17,17 @@ import (
 type TTSCMD struct {
 	Text []string `arg:""`

-	Backend           string `short:"b" default:"piper" help:"Backend to run the TTS model"`
-	Model             string `short:"m" required:"" help:"Model name to run the TTS"`
-	Voice             string `short:"v" help:"Voice name to run the TTS"`
-	Language          string `short:"l" help:"Language to use with the TTS"`
-	OutputFile        string `short:"o" type:"path" help:"The path to write the output wav file"`
-	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	Backend    string `short:"b" default:"piper" help:"Backend to run the TTS model"`
+	Model      string `short:"m" required:"" help:"Model name to run the TTS"`
+	Voice      string `short:"v" help:"Voice name to run the TTS"`
+	Language   string `short:"l" help:"Language to use with the TTS"`
+	OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
+	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 }

 func (t *TTSCMD) Run(ctx *cliContext.Context) error {
 	outputFile := t.OutputFile
-	outputDir := t.BackendAssetsPath
+	outputDir := os.TempDir()
 	if outputFile != "" {
 		outputDir = filepath.Dir(outputFile)
 	}
@ -39,7 +38,6 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
 		ModelPath:           t.ModelsPath,
 		Context:             context.Background(),
 		GeneratedContentDir: outputDir,
-		AssetsDestination:   t.BackendAssetsPath,
 	}
 	ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)

--- a/core/cli/worker/worker.go
+++ b/core/cli/worker/worker.go
@ -1,7 +1,7 @@
 package worker

 type WorkerFlags struct {
-	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	BackendsPath      string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
 	ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
 }

--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@ -9,8 +9,6 @@ import (

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/pkg/assets"
-	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/rs/zerolog/log"
 )

@ -47,24 +45,17 @@ func findLLamaCPPBackend(backendSystemPath string) (string, error) {
 }

 func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
-	// Extract files from the embedded FS
-	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
-	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
-	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
-	}

 	if len(os.Args) < 4 {
 		return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
 	}

-	grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
+	grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
 	if err != nil {
 		return err
 	}

 	args := strings.Split(r.ExtraLLamaCPPArgs, " ")
-	args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)

 	args = append([]string{grpcProcess}, args...)
 	return syscall.Exec(
--- a/core/cli/worker/worker_nop2p.go
+++ b/core/cli/worker/worker_nop2p.go
@ -1,16 +0,0 @@
-//go:build !p2p
-// +build !p2p
-
-package worker
-
-import (
-	"fmt"
-
-	cliContext "github.com/mudler/LocalAI/core/cli/context"
-)
-
-type P2P struct{}
-
-func (r *P2P) Run(ctx *cliContext.Context) error {
-	return fmt.Errorf("p2p mode is not enabled in this build")
-}
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
 package worker

 import (
@ -13,8 +10,6 @@ import (

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/pkg/assets"
-	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/phayes/freeport"
 	"github.com/rs/zerolog/log"
 )
@ -29,12 +24,6 @@ type P2P struct {
 }

 func (r *P2P) Run(ctx *cliContext.Context) error {
-	// Extract files from the embedded FS
-	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
-	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
-	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
-	}

 	// Check if the token is set
 	// as we always need it.
@ -71,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			for {
 				log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)

-				grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
+				grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
 				if err != nil {
 					log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server")
 					return
@ -85,8 +74,6 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
 				log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))

-				args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
-
 				cmd := exec.Command(
 					grpcProcess, args...,
 				)
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@ -6,7 +6,6 @@ import (
 	"regexp"
 	"time"

-	rice "github.com/GeertJohan/go.rice"
 	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 )
@ -17,7 +16,6 @@ type ApplicationConfig struct {
 	ModelPath                           string
 	BackendsPath                        string
 	ExternalBackends                    []string
-	LibPath                             string
 	UploadLimitMB, Threads, ContextSize int
 	F16                                 bool
 	Debug                               bool
@ -50,9 +48,6 @@ type ApplicationConfig struct {
 	Galleries        []Gallery
 	BackendGalleries []Gallery

-	BackendAssets     *rice.Box
-	AssetsDestination string
-
 	ExternalGRPCBackends map[string]string

 	AutoloadGalleries, AutoloadBackendGalleries bool
@ -140,12 +135,6 @@ func WithP2PToken(s string) AppOption {
 	}
 }

-func WithLibPath(path string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.LibPath = path
-	}
-}
-
 var EnableWatchDog = func(o *ApplicationConfig) {
 	o.WatchDog = true
 }
@ -211,18 +200,6 @@ func WithCorsAllowOrigins(b string) AppOption {
 	}
 }

-func WithBackendAssetsOutput(out string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.AssetsDestination = out
-	}
-}
-
-func WithBackendAssets(f *rice.Box) AppOption {
-	return func(o *ApplicationConfig) {
-		o.BackendAssets = f
-	}
-}
-
 func WithStringGalleries(galls string) AppOption {
 	return func(o *ApplicationConfig) {
 		if galls == "" {
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@ -126,8 +126,9 @@ func InstallModelFromGallery(
 		if err != nil {
 			return err
 		}
-
+		log.Debug().Msgf("Installed model %q", installedModel.Name)
 		if automaticallyInstallBackend && installedModel.Backend != "" {
+			log.Debug().Msgf("Installing backend %q", installedModel.Backend)
 			systemState, err := system.GetSystemState()
 			if err != nil {
 				return err
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@ -23,7 +23,6 @@ import (
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"

-	rice "github.com/GeertJohan/go.rice"
 	openaigo "github.com/otiai10/openaigo"
 	"github.com/sashabaranov/go-openai"
 	"github.com/sashabaranov/go-openai/jsonschema"
@ -264,16 +263,6 @@ func getRequest(url string, header http.Header) (error, int, []byte) {

 const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`

-var backendAssets *rice.Box
-
-func init() {
-	var err error
-	backendAssets, err = rice.FindBox("backend-assets")
-	if err != nil {
-		panic(err)
-	}
-}
-
 var _ = Describe("API test", func() {

 	var app *fiber.App
@ -300,9 +289,6 @@ var _ = Describe("API test", func() {
 			modelDir = filepath.Join(tmpdir, "models")
 			err = os.Mkdir(modelDir, 0750)
 			Expect(err).ToNot(HaveOccurred())
-			backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
-			err = os.Mkdir(backendAssetsDir, 0750)
-			Expect(err).ToNot(HaveOccurred())

 			c, cancel = context.WithCancel(context.Background())

@ -341,8 +327,7 @@ var _ = Describe("API test", func() {
 					config.WithModelPath(modelDir),
 					config.WithBackendsPath(backendPath),
 					config.WithApiKeys([]string{apiKey}),
-					config.WithBackendAssets(backendAssets),
-					config.WithBackendAssetsOutput(backendAssetsDir))...)
+				)...)
 			Expect(err).ToNot(HaveOccurred())

 			app, err = API(application)
@ -545,8 +530,7 @@ var _ = Describe("API test", func() {
 					config.WithBackendsPath(backendPath),
 					config.WithGalleries(galleries),
 					config.WithModelPath(modelDir),
-					config.WithBackendAssets(backendAssets),
-					config.WithBackendAssetsOutput(tmpdir))...,
+				)...,
 			)
 			Expect(err).ToNot(HaveOccurred())
 			app, err = API(application)
@ -803,6 +787,10 @@ var _ = Describe("API test", func() {
 		})

 		It("shows the external backend", func() {
+			// Only run on linux
+			if runtime.GOOS != "linux" {
+				Skip("test supported only on linux")
+			}
 			// do an http request to the /system endpoint
 			resp, err := http.Get("http://127.0.0.1:9090/system")
 			Expect(err).ToNot(HaveOccurred())
@ -888,6 +876,13 @@ var _ = Describe("API test", func() {
 		// See tests/integration/stores_test
 		Context("Stores", Label("stores"), func() {

+			BeforeEach(func() {
+				// Only run on linux
+				if runtime.GOOS != "linux" {
+					Skip("test supported only on linux")
+				}
+			})
+
 			It("sets, gets, finds and deletes entries", func() {
 				ks := [][]float32{
 					{0.1, 0.2, 0.3},
--- a/core/http/endpoints/localai/stores.go
+++ b/core/http/endpoints/localai/stores.go
@ -17,7 +17,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
 			return err
 		}

-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
@ -45,7 +45,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
 			return err
 		}

-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
@ -67,7 +67,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
 			return err
 		}

-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
@ -99,7 +99,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf
 			return err
 		}

-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@ -13,10 +13,7 @@ import (
 // @Router /system [get]
 func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
-		if err != nil {
-			return err
-		}
+		availableBackends := []string{}
 		loadedModels := ml.ListModels()
 		for b := range appConfig.ExternalGRPCBackends {
 			availableBackends = append(availableBackends, b)
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@ -5,7 +5,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/utils"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
@ -37,7 +36,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 			"Models":            modelsWithoutConfig,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
-			"IsP2PEnabled":      p2p.IsP2PEnabled(),
 			"ApplicationConfig": appConfig,
 			"ProcessingModels":  processingModels,
 			"TaskTypes":         taskTypes,
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@ -6,7 +6,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/middleware"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
@ -80,10 +79,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
 	router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))

 	// p2p
-	if p2p.IsP2PEnabled() {
-		router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
-		router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
-	}
+	router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
+	router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))

 	router.Get("/version", func(c *fiber.Ctx) error {
 		return c.JSON(struct {
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@ -25,38 +25,39 @@ func RegisterUIRoutes(app *fiber.App,

 	app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))

-	if p2p.IsP2PEnabled() {
-		app.Get("/p2p", func(c *fiber.Ctx) error {
-			summary := fiber.Map{
-				"Title":   "LocalAI - P2P dashboard",
-				"BaseURL": utils.BaseURL(c),
-				"Version": internal.PrintableVersion(),
-				//"Nodes":          p2p.GetAvailableNodes(""),
-				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
-				"IsP2PEnabled": p2p.IsP2PEnabled(),
-				"P2PToken":     appConfig.P2PToken,
-				"NetworkID":    appConfig.P2PNetworkID,
-			}
+	// P2P
+	app.Get("/p2p", func(c *fiber.Ctx) error {
+		summary := fiber.Map{
+			"Title":   "LocalAI - P2P dashboard",
+			"BaseURL": utils.BaseURL(c),
+			"Version": internal.PrintableVersion(),
+			//"Nodes":          p2p.GetAvailableNodes(""),
+			//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),

-			// Render index
-			return c.Render("views/p2p", summary)
-		})
+			"P2PToken":  appConfig.P2PToken,
+			"NetworkID": appConfig.P2PNetworkID,
+		}

-		/* show nodes live! */
-		app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
-		})
-		app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
-		})
+		// Render index
+		return c.Render("views/p2p", summary)
+	})

-		app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
-		})
-		app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
-		})
-	}
+	/* show nodes live! */
+	app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
+	})
+	app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
+	})
+
+	app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
+	})
+	app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
+	})
+
+	// End P2P

 	if !appConfig.DisableGalleryEndpoint {
 		registerGalleryRoutes(app, cl, appConfig, galleryService, processingOps)
@ -76,8 +77,8 @@ func RegisterUIRoutes(app *fiber.App,
 			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
-			"Version":      internal.PrintableVersion(),
+
+			"Version": internal.PrintableVersion(),
 		}

 		// Render index
@ -121,7 +122,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig":        backendConfigs,
 			"Model":               modelThatCanBeUsed,
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@ -151,7 +151,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               c.Params("model"),
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@ -169,7 +168,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               c.Params("model"),
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@ -203,7 +201,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               modelThatCanBeUsed,
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@ -221,7 +218,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               c.Params("model"),
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@ -253,7 +249,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig":        backendConfigs,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               modelThatCanBeUsed,
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 			"Version":             internal.PrintableVersion(),
 		}

--- a/core/http/routes/ui_backend_gallery.go
+++ b/core/http/routes/ui_backend_gallery.go
@ -15,7 +15,6 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/utils"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/rs/zerolog/log"
@ -71,7 +70,6 @@ func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationC
 			"ProcessingBackends": processingBackendsData,
 			"AvailableBackends":  len(backends),
 			"TaskTypes":          taskTypes,
-			"IsP2PEnabled":       p2p.IsP2PEnabled(),
 		}

 		if page == "" {
--- a/core/http/routes/ui_gallery.go
+++ b/core/http/routes/ui_gallery.go
@ -15,7 +15,6 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/utils"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/rs/zerolog/log"
@ -70,9 +69,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
 			"AllTags":          tags,
 			"ProcessingModels": processingModelsData,
 			"AvailableModels":  len(models),
-			"IsP2PEnabled":     p2p.IsP2PEnabled(),
-
-			"TaskTypes": taskTypes,
+			"TaskTypes":        taskTypes,
 			//	"ApplicationConfig": appConfig,
 		}

--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@ -268,7 +268,7 @@
                                    Command to connect (click to copy): 
                                </p>
                                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
-                                    docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master-ffmpeg-core federated --debug
+                                    docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master federated --debug
                                </code>
                                or via CLI:
                                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@ -49,11 +49,11 @@
            </div>
        
            <!-- Warning box if p2p token is empty and p2p is enabled -->
-            {{ if and .IsP2PEnabled (eq .P2PToken "") }}
+            {{ if eq .P2PToken "" }}
            <div class="bg-gradient-to-r from-red-800/70 to-red-700/70 border border-red-600/50 p-6 rounded-xl shadow-lg mb-10 text-left">
                <div class="flex items-center mb-2">
                    <i class="fa-solid fa-exclamation-triangle text-red-300 text-2xl mr-3"></i>
-                    <h3 class="text-xl font-bold text-white">Warning: P2P mode is disabled or no token was specified</h3>
+                    <h3 class="text-xl font-bold text-white">Warning: P2P token was not specified</h3>
                </div>
                <p class="mb-4 text-red-200">
                    You have to enable P2P mode by starting LocalAI with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code>. Please restart the server with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code> to generate a new token automatically that can be used to discover other nodes. If you already have a token, specify it with <code class="bg-red-900/50 px-2 py-0.5 rounded">export TOKEN=".."</code>
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@ -40,11 +40,9 @@
                <a href="talk/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
                    <i class="fa-solid fa-phone text-blue-400 mr-2"></i>Talk
                </a>
-                {{ if .IsP2PEnabled }}
                <a href="p2p/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
                    <i class="fa-solid fa-circle-nodes text-blue-400 mr-2"></i>Swarm
                </a>
-                {{ end }}
                <a href="swagger/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
                    <i class="fas fa-code text-blue-400 mr-2"></i>API
                </a>
@ -75,11 +73,9 @@
                <a href="talk/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                    <i class="fa-solid fa-phone text-blue-400 mr-3 w-5 text-center"></i>Talk
                </a>
-                {{ if .IsP2PEnabled }}
                <a href="p2p/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                    <i class="fa-solid fa-circle-nodes text-blue-400 mr-3 w-5 text-center"></i>Swarm
                </a>
-                {{ end }}
                <a href="swagger/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                    <i class="fas fa-code text-blue-400 mr-3 w-5 text-center"></i>API
                </a>
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
 package p2p

 import (
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
 package p2p

 import (
@ -65,10 +62,6 @@ func GenerateToken(DHTInterval, OTPInterval int) string {
 	return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
 }

-func IsP2PEnabled() bool {
-	return true
-}
-
 func nodeID(s string) string {
 	hostname, _ := os.Hostname()
 	return fmt.Sprintf("%s-%s", hostname, s)
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@ -1,35 +0,0 @@
-//go:build !p2p
-// +build !p2p
-
-package p2p
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/mudler/edgevpn/pkg/node"
-)
-
-func GenerateToken(DHTInterval, OTPInterval int) string {
-	return "not implemented"
-}
-
-func (f *FederatedServer) Start(ctx context.Context) error {
-	return fmt.Errorf("not implemented")
-}
-
-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData), allocate bool) error {
-	return fmt.Errorf("not implemented")
-}
-
-func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
-	return nil, fmt.Errorf("not implemented")
-}
-
-func IsP2PEnabled() bool {
-	return false
-}
-
-func NewNode(token string) (*node.Node, error) {
-	return nil, fmt.Errorf("not implemented")
-}
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@ -63,23 +63,29 @@ type VADResponse struct {
 	Segments []VADSegment `json:"segments" yaml:"segments"`
 }

+type StoreCommon struct {
+	Backend string `json:"backend,omitempty" yaml:"backend,omitempty"`
+}
 type StoresSet struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`

 	Keys   [][]float32 `json:"keys" yaml:"keys"`
 	Values []string    `json:"values" yaml:"values"`
+	StoreCommon
 }

 type StoresDelete struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`

 	Keys [][]float32 `json:"keys"`
+	StoreCommon
 }

 type StoresGet struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`

 	Keys [][]float32 `json:"keys" yaml:"keys"`
+	StoreCommon
 }

 type StoresGetResponse struct {
@ -92,6 +98,7 @@ type StoresFind struct {

 	Key  []float32 `json:"key" yaml:"key"`
 	Topk int       `json:"topk" yaml:"topk"`
+	StoreCommon
 }

 type StoresFindResponse struct {
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -5,7 +5,7 @@ services:
    # Available images with CUDA, ROCm, SYCL
    # Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags
    # Image list (dockerhub): https://hub.docker.com/r/localai/localai
-    image: quay.io/go-skynet/local-ai:master-ffmpeg-core
+    image: quay.io/go-skynet/local-ai:master
    build:
      context: .
      dockerfile: Dockerfile
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@ -579,38 +579,14 @@ You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to r

 ### Extra backends

-LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends.
-
-If you wish to build a custom container image with extra backends, you can use the core images and build only the backends you are interested into or prepare the environment on startup by using the `EXTRA_BACKENDS` environment variable. For instance, to use the diffusers backend:
-
-```Dockerfile
-FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
-
-RUN make -C backend/python/diffusers
-```
-
-Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--external-grpc-backends` as CLI flag) to point to the backends you are using (`EXTERNAL_GRPC_BACKENDS="backend_name:/path/to/backend"`), for example with diffusers:
-
-```Dockerfile
-FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
-
-RUN make -C backend/python/diffusers
-
-ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh"
-```
-
-{{% alert note %}}
-
-You can specify remote external backends or path to local files. The syntax is `backend-name:/path/to/backend` or `backend-name:host:port`.
-
-{{% /alert %}}
+LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. See the [backend section](https://localai.io/backends/) for more details on how to install and build new backends for LocalAI.

 #### In runtime

 When using the `-core` container image it is possible to prepare the python backends you are interested into by using the `EXTRA_BACKENDS` variable, for instance:

 ```bash
-docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core
+docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master
 ```

 ### Concurrent requests
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@ -73,8 +73,6 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta

 - CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
 - CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...

 In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:

@ -259,7 +257,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/

 ### Container images

-To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...
+To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16`, ...

 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).

@ -268,7 +266,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
 To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:

 ```bash
-docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32 phi-2
 ```

 ### Notes
@ -276,7 +274,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -
 In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:

 ```bash
-docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
+docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16
 ```

 Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
--- a/docs/content/docs/features/embeddings.md
+++ b/docs/content/docs/features/embeddings.md
@ -44,7 +44,6 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
 {{% alert note %}}

 - The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use.
- If you are running `LocalAI` manually you must install the python dependencies (`make prepare-extra-conda-environments`). This requires `conda` to be installed.
 - For local execution, you also have to specify the extra backend in the `EXTERNAL_GRPC_BACKENDS` environment variable.
    - Example: `EXTERNAL_GRPC_BACKENDS="sentencetransformers:/path/to/LocalAI/backend/python/sentencetransformers/sentencetransformers.py"`
 - The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`.
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@ -18,8 +18,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA

 - Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
 - Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).

 {{% /alert %}}

--- a/docs/content/docs/getting-started/customize-model.md
+++ b/docs/content/docs/getting-started/customize-model.md
@ -23,7 +23,7 @@ MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branc
 Here's an example to initiate the **phi-2** model:

 ```bash
-docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
+docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
 ```

 You can also check all the embedded models configurations [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
@ -64,7 +64,7 @@ Then, launch LocalAI using your gist's URL:

 ```bash
 ## Important! Substitute with your gist's URL!
-docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/xxxx/phi-2.yaml
+docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/xxxx/phi-2.yaml
 ```

 ## Next Steps
--- a/gallery/alpaca.yaml
+++ b/gallery/alpaca.yaml
@ -2,6 +2,7 @@
 name: "alpaca"

 config_file: |
+  backend: "llama-cpp"
  context_size: 4096
  f16: true
  mmap: true
--- a/gallery/arch-function.yaml
+++ b/gallery/arch-function.yaml
@ -2,6 +2,7 @@
 name: "chatml"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  function:
    disable_no_action: true
--- a/gallery/chatml-hercules.yaml
+++ b/gallery/chatml-hercules.yaml
@ -2,6 +2,7 @@
 name: "chatml-hercules"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  function:
    # disable injecting the "answer" tool
--- a/gallery/chatml.yaml
+++ b/gallery/chatml.yaml
@ -2,6 +2,7 @@
 name: "chatml"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/command-r.yaml
+++ b/gallery/command-r.yaml
@ -2,6 +2,7 @@
 name: "command-r"

 config_file: |
+  backend: "llama-cpp"
  context_size: 131072
  stopwords:
  - "<|END_OF_TURN_TOKEN|>"
--- a/gallery/deephermes.yaml
+++ b/gallery/deephermes.yaml
@ -2,6 +2,7 @@
 name: "deephermes"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  context_size: 8192
  stopwords:
--- a/gallery/deepseek-r1.yaml
+++ b/gallery/deepseek-r1.yaml
@ -2,6 +2,7 @@
 name: "deepseek-r1"

 config_file: |
+  backend: "llama-cpp"
  context_size: 131072
  mmap: true
  f16: true
--- a/gallery/deepseek.yaml
+++ b/gallery/deepseek.yaml
@ -2,6 +2,7 @@
 name: "deepseek"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  context_size: 8192
  template:
--- a/gallery/falcon3.yaml
+++ b/gallery/falcon3.yaml
@ -2,6 +2,7 @@
 name: "falcon3"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/gemma.yaml
+++ b/gallery/gemma.yaml
@ -2,6 +2,7 @@
 name: "gemma"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  context_size: 8192
  template:
--- a/gallery/granite.yaml
+++ b/gallery/granite.yaml
@ -2,6 +2,7 @@
 name: "granite"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/granite3-2.yaml
+++ b/gallery/granite3-2.yaml
@ -2,6 +2,7 @@
 name: "granite-3.2"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@ -2,6 +2,7 @@
 name: "hermes-2-pro-mistral"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  context_size: 8192
  stopwords:
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@ -2,6 +2,7 @@
 name: "llama3-instruct"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/llama3.1-instruct-grammar.yaml
+++ b/gallery/llama3.1-instruct-grammar.yaml
@ -2,6 +2,7 @@
 name: "llama3-instruct-grammar"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  function:
    disable_no_action: true
--- a/gallery/llama3.1-instruct.yaml
+++ b/gallery/llama3.1-instruct.yaml
@ -2,6 +2,7 @@
 name: "llama3-instruct"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  function:
    disable_no_action: true
--- a/gallery/llama3.1-reflective.yaml
+++ b/gallery/llama3.1-reflective.yaml
@ -2,6 +2,7 @@
 name: "llama3-instruct"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  cutstrings:
  - (.*?)</thinking>
--- a/gallery/llama3.2-fcall.yaml
+++ b/gallery/llama3.2-fcall.yaml
@ -2,6 +2,7 @@
 name: "llama3.2-fcall"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  function:
    json_regex_match:
--- a/gallery/llama3.2-quantized.yaml
+++ b/gallery/llama3.2-quantized.yaml
@ -2,6 +2,7 @@
 name: "llama3.2-quantized"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  function:
    disable_no_action: true
--- a/gallery/mathstral.yaml
+++ b/gallery/mathstral.yaml
@ -2,6 +2,7 @@
 name: "mathstral"

 config_file: |
+  backend: "llama-cpp"
  context_size: 8192
  mmap: true
  stopwords:
--- a/gallery/mistral-0.3.yaml
+++ b/gallery/mistral-0.3.yaml
@ -2,6 +2,7 @@
 name: "mistral-0.3"

 config_file: |
+  backend: "llama-cpp"
  context_size: 8192
  mmap: true
  stopwords:
--- a/gallery/moondream.yaml
+++ b/gallery/moondream.yaml
@ -3,6 +3,7 @@ name: "moondream2"


 config_file: |
+    backend: "llama-cpp"
    context_size: 2046
    roles:
      user: "\nQuestion: "
--- a/gallery/mudler.yaml
+++ b/gallery/mudler.yaml
@ -2,6 +2,7 @@
 name: localai

 config_file: |-
+  backend: "llama-cpp"
  context_size: 8192
  stopwords:
    - <|im_end|>
--- a/gallery/phi-2-chat.yaml
+++ b/gallery/phi-2-chat.yaml
@ -2,6 +2,7 @@
 name: "phi-2-chatml"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/phi-2-orange.yaml
+++ b/gallery/phi-2-orange.yaml
@ -2,6 +2,7 @@
 name: "phi-2-orange"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/phi-3-chat.yaml
+++ b/gallery/phi-3-chat.yaml
@ -2,6 +2,7 @@
 name: "phi-3-chat"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  template:
    chat_message: |
--- a/gallery/phi-4-chat-fcall.yaml
+++ b/gallery/phi-4-chat-fcall.yaml
@ -2,6 +2,7 @@
 name: "phi-4-chat"

 config_file: |
+  backend: "llama-cpp"
  mmap: true
  function:
    json_regex_match:
--- a/Show more
+++ b/Show more