mirror of
https://github.com/mudler/LocalAI
synced 2026-04-21 13:27:21 +00:00
feat: add cuda13 images (#7404)
* chore(ci): add cuda13 jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add to pipelines and to capabilities. Start to work on the gallery Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * gallery Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * capabilities: try to detect by looking at /usr/local Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * neutts Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * backends.yaml Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add cuda13 l4t requirements.txt Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add cuda13 requirements.txt Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Pin vllm Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Not all backends are compatible Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add vllm to requirements Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * vllm is not pre-compiled for cuda 13 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
9872bdf455
commit
cfd95745ed
22 changed files with 631 additions and 76 deletions
303
.github/workflows/backend.yml
vendored
303
.github/workflows/backend.yml
vendored
|
|
@ -245,7 +245,6 @@ jobs:
|
|||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# CUDA 12 additional backends
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
|
|
@ -306,6 +305,247 @@ jobs:
|
|||
backend: "chatterbox"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "rfdetr"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "exllama2"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-neutts'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "neutts"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# cuda 13
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "rerankers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "transformers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-l4t-diffusers'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
skip-drivers: 'true'
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "kokoro"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-chatterbox'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "chatterbox"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-whisper'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-rfdetr'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "rfdetr"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# hipblas builds
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
|
|
@ -625,18 +865,6 @@ jobs:
|
|||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
|
|
@ -710,18 +938,6 @@ jobs:
|
|||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
|
|
@ -846,18 +1062,6 @@ jobs:
|
|||
backend: "rfdetr"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "rfdetr"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
|
|
@ -907,18 +1111,6 @@ jobs:
|
|||
backend: "exllama2"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "exllama2"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
|
|
@ -1006,18 +1198,7 @@ jobs:
|
|||
backend: "neutts"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-neutts'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "neutts"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
|
|
|
|||
9
.github/workflows/image-pr.yml
vendored
9
.github/workflows/image-pr.yml
vendored
|
|
@ -43,6 +43,15 @@ jobs:
|
|||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
|
|
|
|||
21
.github/workflows/image.yml
vendored
21
.github/workflows/image.yml
vendored
|
|
@ -100,6 +100,17 @@ jobs:
|
|||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
aio: "-aio-gpu-nvidia-cuda-12"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
aio: "-aio-gpu-nvidia-cuda-13"
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
|
|
@ -152,3 +163,13 @@ jobs:
|
|||
runs-on: 'ubuntu-24.04-arm'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'true'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-cuda-13'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'true'
|
||||
|
|
|
|||
|
|
@ -68,13 +68,13 @@ RUN <<EOT bash
|
|||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
echo "nvidia" > /run/localai/capability
|
||||
echo "nvidia-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
|
||||
fi
|
||||
EOT
|
||||
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
|
||||
echo "nvidia-l4t" > /run/localai/capability
|
||||
echo "nvidia-l4t-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
|
||||
fi
|
||||
EOT
|
||||
|
||||
|
|
|
|||
15
Makefile
15
Makefile
|
|
@ -4,6 +4,9 @@ GOVET=$(GOCMD) vet
|
|||
BINARY_NAME=local-ai
|
||||
LAUNCHER_BINARY_NAME=local-ai-launcher
|
||||
|
||||
CUDA_MAJOR_VERSION?=13
|
||||
CUDA_MINOR_VERSION?=0
|
||||
|
||||
GORELEASER?=
|
||||
|
||||
export BUILD_TYPE?=
|
||||
|
|
@ -383,6 +386,9 @@ backends/llama-cpp-darwin: build
|
|||
backends/neutts: docker-build-neutts docker-save-neutts build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/neutts.tar)"
|
||||
|
||||
backends/vllm: docker-build-vllm docker-save-vllm build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/vllm.tar)"
|
||||
|
||||
build-darwin-python-backend: build
|
||||
bash ./scripts/build/python-darwin.sh
|
||||
|
||||
|
|
@ -448,6 +454,12 @@ docker-save-neutts: backend-images
|
|||
docker-build-kokoro:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
|
||||
|
||||
docker-build-vllm:
|
||||
docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend
|
||||
|
||||
docker-save-vllm: backend-images
|
||||
docker save local-ai-backend:vllm -o backend-images/vllm.tar
|
||||
|
||||
docker-save-kokoro: backend-images
|
||||
docker save local-ai-backend:kokoro -o backend-images/kokoro.tar
|
||||
|
||||
|
|
@ -484,9 +496,6 @@ docker-save-stablediffusion-ggml: backend-images
|
|||
docker-build-rerankers:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
|
||||
|
||||
docker-build-vllm:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
|
||||
|
||||
docker-build-transformers:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@
|
|||
vulkan: "vulkan-llama-cpp"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-llama-cpp"
|
||||
darwin-x86: "darwin-x86-llama-cpp"
|
||||
nvidia-cuda-13: "cuda13-llama-cpp"
|
||||
nvidia-cuda-12: "cuda12-llama-cpp"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp"
|
||||
- &whispercpp
|
||||
name: "whisper"
|
||||
alias: "whisper"
|
||||
|
|
@ -49,6 +53,10 @@
|
|||
amd: "rocm-whisper"
|
||||
vulkan: "vulkan-whisper"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-whisper"
|
||||
nvidia-cuda-13: "cuda13-whisper"
|
||||
nvidia-cuda-12: "cuda12-whisper"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisper"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-whisper"
|
||||
- &stablediffusionggml
|
||||
name: "stablediffusion-ggml"
|
||||
alias: "stablediffusion-ggml"
|
||||
|
|
@ -73,6 +81,10 @@
|
|||
vulkan: "vulkan-stablediffusion-ggml"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
|
||||
metal: "metal-stablediffusion-ggml"
|
||||
nvidia-cuda-13: "cuda13-stablediffusion-ggml"
|
||||
nvidia-cuda-12: "cuda12-stablediffusion-ggml"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-stablediffusion-ggml"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml"
|
||||
# darwin-x86: "darwin-x86-stablediffusion-ggml"
|
||||
- &rfdetr
|
||||
name: "rfdetr"
|
||||
|
|
@ -96,6 +108,9 @@
|
|||
#amd: "rocm-rfdetr"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
|
||||
default: "cpu-rfdetr"
|
||||
nvidia-cuda-13: "cuda13-rfdetr"
|
||||
nvidia-cuda-12: "cuda12-rfdetr"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-rfdetr"
|
||||
- &vllm
|
||||
name: "vllm"
|
||||
license: apache-2.0
|
||||
|
|
@ -128,6 +143,7 @@
|
|||
nvidia: "cuda12-vllm"
|
||||
amd: "rocm-vllm"
|
||||
intel: "intel-vllm"
|
||||
nvidia-cuda-12: "cuda12-vllm"
|
||||
- &mlx
|
||||
name: "mlx"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx"
|
||||
|
|
@ -201,6 +217,8 @@
|
|||
nvidia: "cuda12-transformers"
|
||||
intel: "intel-transformers"
|
||||
amd: "rocm-transformers"
|
||||
nvidia-cuda-13: "cuda13-transformers"
|
||||
nvidia-cuda-12: "cuda12-transformers"
|
||||
- &diffusers
|
||||
name: "diffusers"
|
||||
icon: https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg
|
||||
|
|
@ -221,6 +239,10 @@
|
|||
nvidia-l4t: "nvidia-l4t-diffusers"
|
||||
metal: "metal-diffusers"
|
||||
default: "cpu-diffusers"
|
||||
nvidia-cuda-13: "cuda13-diffusers"
|
||||
nvidia-cuda-12: "cuda12-diffusers"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-diffusers"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-diffusers"
|
||||
- &exllama2
|
||||
name: "exllama2"
|
||||
urls:
|
||||
|
|
@ -236,6 +258,7 @@
|
|||
capabilities:
|
||||
nvidia: "cuda12-exllama2"
|
||||
intel: "intel-exllama2"
|
||||
nvidia-cuda-12: "cuda12-exllama2"
|
||||
- &faster-whisper
|
||||
icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
|
||||
description: |
|
||||
|
|
@ -252,6 +275,8 @@
|
|||
nvidia: "cuda12-faster-whisper"
|
||||
intel: "intel-faster-whisper"
|
||||
amd: "rocm-faster-whisper"
|
||||
nvidia-cuda-13: "cuda13-faster-whisper"
|
||||
nvidia-cuda-12: "cuda12-faster-whisper"
|
||||
- &kokoro
|
||||
icon: https://avatars.githubusercontent.com/u/166769057?v=4
|
||||
description: |
|
||||
|
|
@ -271,6 +296,9 @@
|
|||
intel: "intel-kokoro"
|
||||
amd: "rocm-kokoro"
|
||||
nvidia-l4t: "nvidia-l4t-kokoro"
|
||||
nvidia-cuda-13: "cuda13-kokoro"
|
||||
nvidia-cuda-12: "cuda12-kokoro"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro"
|
||||
- &coqui
|
||||
urls:
|
||||
- https://github.com/idiap/coqui-ai-TTS
|
||||
|
|
@ -292,6 +320,8 @@
|
|||
nvidia: "cuda12-coqui"
|
||||
intel: "intel-coqui"
|
||||
amd: "rocm-coqui"
|
||||
nvidia-cuda-13: "cuda13-coqui"
|
||||
nvidia-cuda-12: "cuda12-coqui"
|
||||
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
|
||||
- &bark
|
||||
urls:
|
||||
|
|
@ -308,6 +338,8 @@
|
|||
cuda: "cuda12-bark"
|
||||
intel: "intel-bark"
|
||||
rocm: "rocm-bark"
|
||||
nvidia-cuda-13: "cuda13-bark"
|
||||
nvidia-cuda-12: "cuda12-bark"
|
||||
icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
|
||||
- &barkcpp
|
||||
urls:
|
||||
|
|
@ -354,6 +386,10 @@
|
|||
metal: "metal-chatterbox"
|
||||
default: "cpu-chatterbox"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
|
||||
nvidia-cuda-13: "cuda13-chatterbox"
|
||||
nvidia-cuda-12: "cuda12-chatterbox"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-chatterbox"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-chatterbox"
|
||||
- &piper
|
||||
name: "piper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
|
||||
|
|
@ -442,6 +478,8 @@
|
|||
nvidia: "cuda12-neutts"
|
||||
amd: "rocm-neutts"
|
||||
nvidia-l4t: "nvidia-l4t-neutts"
|
||||
nvidia-cuda-12: "cuda12-neutts"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts"
|
||||
- !!merge <<: *neutts
|
||||
name: "neutts-development"
|
||||
capabilities:
|
||||
|
|
@ -449,6 +487,22 @@
|
|||
nvidia: "cuda12-neutts-development"
|
||||
amd: "rocm-neutts-development"
|
||||
nvidia-l4t: "nvidia-l4t-neutts-development"
|
||||
nvidia-cuda-12: "cuda12-neutts-development"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts-development"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "llama-cpp-development"
|
||||
capabilities:
|
||||
default: "cpu-llama-cpp-development"
|
||||
nvidia: "cuda12-llama-cpp-development"
|
||||
intel: "intel-sycl-f16-llama-cpp-development"
|
||||
amd: "rocm-llama-cpp-development"
|
||||
metal: "metal-llama-cpp-development"
|
||||
vulkan: "vulkan-llama-cpp-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-development"
|
||||
nvidia-cuda-13: "cuda13-llama-cpp-development"
|
||||
nvidia-cuda-12: "cuda12-llama-cpp-development"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-development"
|
||||
- !!merge <<: *neutts
|
||||
name: "cpu-neutts"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-neutts"
|
||||
|
|
@ -465,7 +519,7 @@
|
|||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-rocm-hipblas-neutts
|
||||
- !!merge <<: *neutts
|
||||
name: "nvidia-l4t-neutts"
|
||||
name: "nvidia-l4t-arm64-neutts"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-neutts"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-neutts
|
||||
|
|
@ -485,7 +539,7 @@
|
|||
mirrors:
|
||||
- localai/localai-backends:master-gpu-rocm-hipblas-neutts
|
||||
- !!merge <<: *neutts
|
||||
name: "nvidia-l4t-neutts-development"
|
||||
name: "nvidia-l4t-arm64-neutts-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-neutts"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-neutts
|
||||
|
|
@ -550,6 +604,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-llama-cpp
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda13-nvidia-l4t-arm64-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-llama-cpp
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda13-nvidia-l4t-arm64-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-llama-cpp
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cpu-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp"
|
||||
|
|
@ -630,6 +694,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-sycl-f16-llama-cpp
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda13-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-llama-cpp
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda13-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-llama-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-llama-cpp
|
||||
## whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "nvidia-l4t-arm64-whisper"
|
||||
|
|
@ -641,6 +715,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "cuda13-nvidia-l4t-arm64-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "cuda13-nvidia-l4t-arm64-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "cpu-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper"
|
||||
|
|
@ -731,6 +815,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-sycl-f16-whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "cuda13-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "cuda13-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-whisper
|
||||
## stablediffusion-ggml
|
||||
- !!merge <<: *stablediffusionggml
|
||||
name: "cpu-stablediffusion-ggml"
|
||||
|
|
@ -810,6 +904,26 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml
|
||||
- !!merge <<: *stablediffusionggml
|
||||
name: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml
|
||||
- !!merge <<: *stablediffusionggml
|
||||
name: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml
|
||||
- !!merge <<: *stablediffusionggml
|
||||
name: "cuda13-stablediffusion-ggml"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-stablediffusion-ggml"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-stablediffusion-ggml
|
||||
- !!merge <<: *stablediffusionggml
|
||||
name: "cuda13-stablediffusion-ggml-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-stablediffusion-ggml"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-stablediffusion-ggml
|
||||
# vllm
|
||||
- !!merge <<: *vllm
|
||||
name: "vllm-development"
|
||||
|
|
@ -856,6 +970,7 @@
|
|||
#amd: "rocm-rfdetr-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
|
||||
default: "cpu-rfdetr-development"
|
||||
nvidia-cuda-13: "cuda13-rfdetr-development"
|
||||
- !!merge <<: *rfdetr
|
||||
name: "cuda12-rfdetr"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr"
|
||||
|
|
@ -876,6 +991,11 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-rfdetr"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-rfdetr
|
||||
- !!merge <<: *rfdetr
|
||||
name: "nvidia-l4t-arm64-rfdetr-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-rfdetr"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-rfdetr
|
||||
- !!merge <<: *rfdetr
|
||||
name: "cpu-rfdetr"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr"
|
||||
|
|
@ -906,6 +1026,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rfdetr"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-intel-rfdetr
|
||||
- !!merge <<: *rfdetr
|
||||
name: "cuda13-rfdetr"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-rfdetr"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-rfdetr
|
||||
- !!merge <<: *rfdetr
|
||||
name: "cuda13-rfdetr-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rfdetr"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-rfdetr
|
||||
## Rerankers
|
||||
- !!merge <<: *rerankers
|
||||
name: "rerankers-development"
|
||||
|
|
@ -913,6 +1043,7 @@
|
|||
nvidia: "cuda12-rerankers-development"
|
||||
intel: "intel-rerankers-development"
|
||||
amd: "rocm-rerankers-development"
|
||||
nvidia-cuda-13: "cuda13-rerankers-development"
|
||||
- !!merge <<: *rerankers
|
||||
name: "cuda11-rerankers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
|
||||
|
|
@ -953,6 +1084,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-rerankers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-rerankers
|
||||
- !!merge <<: *rerankers
|
||||
name: "cuda13-rerankers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-rerankers"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-rerankers
|
||||
- !!merge <<: *rerankers
|
||||
name: "cuda13-rerankers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rerankers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-rerankers
|
||||
## Transformers
|
||||
- !!merge <<: *transformers
|
||||
name: "transformers-development"
|
||||
|
|
@ -960,6 +1101,7 @@
|
|||
nvidia: "cuda12-transformers-development"
|
||||
intel: "intel-transformers-development"
|
||||
amd: "rocm-transformers-development"
|
||||
nvidia-cuda-13: "cuda13-transformers-development"
|
||||
- !!merge <<: *transformers
|
||||
name: "cuda12-transformers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
|
||||
|
|
@ -1000,6 +1142,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-transformers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-transformers
|
||||
- !!merge <<: *transformers
|
||||
name: "cuda13-transformers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-transformers"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-transformers
|
||||
- !!merge <<: *transformers
|
||||
name: "cuda13-transformers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-transformers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-transformers
|
||||
## Diffusers
|
||||
- !!merge <<: *diffusers
|
||||
name: "diffusers-development"
|
||||
|
|
@ -1010,6 +1162,7 @@
|
|||
nvidia-l4t: "nvidia-l4t-diffusers-development"
|
||||
metal: "metal-diffusers-development"
|
||||
default: "cpu-diffusers-development"
|
||||
nvidia-cuda-13: "cuda13-diffusers-development"
|
||||
- !!merge <<: *diffusers
|
||||
name: "cpu-diffusers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-diffusers"
|
||||
|
|
@ -1030,6 +1183,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-diffusers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-l4t-diffusers
|
||||
- !!merge <<: *diffusers
|
||||
name: "cuda13-nvidia-l4t-arm64-diffusers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-diffusers"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-l4t-diffusers
|
||||
- !!merge <<: *diffusers
|
||||
name: "cuda13-nvidia-l4t-arm64-diffusers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-diffusers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-l4t-diffusers
|
||||
- !!merge <<: *diffusers
|
||||
name: "cuda12-diffusers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
|
||||
|
|
@ -1070,6 +1233,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-diffusers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-diffusers
|
||||
- !!merge <<: *diffusers
|
||||
name: "cuda13-diffusers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-diffusers"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-diffusers
|
||||
- !!merge <<: *diffusers
|
||||
name: "cuda13-diffusers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-diffusers"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-diffusers
|
||||
- !!merge <<: *diffusers
|
||||
name: "metal-diffusers"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-diffusers"
|
||||
|
|
@ -1164,6 +1337,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-rocm-hipblas-kokoro
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda13-kokoro"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-kokoro"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-kokoro
|
||||
- !!merge <<: *kokoro
|
||||
name: "cuda13-kokoro-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-kokoro"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-kokoro
|
||||
## faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "faster-whisper-development"
|
||||
|
|
@ -1171,6 +1354,7 @@
|
|||
nvidia: "cuda12-faster-whisper-development"
|
||||
intel: "intel-faster-whisper-development"
|
||||
amd: "rocm-faster-whisper-development"
|
||||
nvidia-cuda-13: "cuda13-faster-whisper-development"
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cuda11-faster-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper"
|
||||
|
|
@ -1196,6 +1380,16 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cuda13-faster-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cuda13-faster-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-faster-whisper
|
||||
## coqui
|
||||
|
||||
- !!merge <<: *coqui
|
||||
|
|
@ -1303,6 +1497,10 @@
|
|||
metal: "metal-chatterbox-development"
|
||||
default: "cpu-chatterbox-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
|
||||
nvidia-cuda-13: "cuda13-chatterbox-development"
|
||||
nvidia-cuda-12: "cuda12-chatterbox-development"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-chatterbox"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-chatterbox"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cpu-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
|
||||
|
|
@ -1353,3 +1551,23 @@
|
|||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-12-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda13-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda13-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda13-nvidia-l4t-arm64-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda13-nvidia-l4t-arm64-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-l4t-arm64-chatterbox
|
||||
|
|
|
|||
8
backend/python/chatterbox/requirements-cublas13.txt
Normal file
8
backend/python/chatterbox/requirements-cublas13.txt
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
numpy>=1.24.0,<1.26.0
|
||||
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
accelerate
|
||||
7
backend/python/chatterbox/requirements-l4t13.txt
Normal file
7
backend/python/chatterbox/requirements-l4t13.txt
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
--extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130
|
||||
torch
|
||||
torchaudio
|
||||
transformers
|
||||
numpy>=1.24.0,<1.26.0
|
||||
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||
accelerate
|
||||
|
|
@ -211,7 +211,7 @@ function init() {
|
|||
# - hipblas
|
||||
# - intel
|
||||
function getBuildProfile() {
|
||||
if [ x"${BUILD_TYPE:-}" == "xcublas" ]; then
|
||||
if [ x"${BUILD_TYPE:-}" == "xcublas" ] || [ x"${BUILD_TYPE:-}" == "xl4t" ]; then
|
||||
if [ ! -z "${CUDA_MAJOR_VERSION:-}" ]; then
|
||||
echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION}
|
||||
else
|
||||
|
|
|
|||
|
|
@ -16,4 +16,11 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
# Use python 3.12 for l4t
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ] || [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
PYTHON_PATCH="12"
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
|
|
|||
12
backend/python/diffusers/requirements-cublas13.txt
Normal file
12
backend/python/diffusers/requirements-cublas13.txt
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
git+https://github.com/huggingface/diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
torchvision
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
torch
|
||||
ftfy
|
||||
optimum-quanto
|
||||
12
backend/python/diffusers/requirements-l4t13.txt
Normal file
12
backend/python/diffusers/requirements-l4t13.txt
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
--extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130
|
||||
torch
|
||||
git+https://github.com/huggingface/diffusers
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
optimum-quanto
|
||||
numpy<2
|
||||
sentencepiece
|
||||
torchvision
|
||||
ftfy
|
||||
9
backend/python/faster-whisper/requirements-cublas13.txt
Normal file
9
backend/python/faster-whisper/requirements-cublas13.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch==2.9.1
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
7
backend/python/kokoro/requirements-cublas13.txt
Normal file
7
backend/python/kokoro/requirements-cublas13.txt
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch==2.9.1
|
||||
torchaudio==2.9.1
|
||||
transformers
|
||||
accelerate
|
||||
kokoro
|
||||
soundfile
|
||||
5
backend/python/rerankers/requirements-cublas13.txt
Normal file
5
backend/python/rerankers/requirements-cublas13.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.9.1
|
||||
rerankers[transformers]
|
||||
8
backend/python/rfdetr/requirements-cublas13.txt
Normal file
8
backend/python/rfdetr/requirements-cublas13.txt
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch==2.9.1
|
||||
rfdetr
|
||||
opencv-python
|
||||
accelerate
|
||||
inference
|
||||
peft
|
||||
optimum-quanto
|
||||
9
backend/python/transformers/requirements-cublas13.txt
Normal file
9
backend/python/transformers/requirements-cublas13.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch==2.9.0
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==5.1.0
|
||||
protobuf==6.33.1
|
||||
|
|
@ -4,6 +4,7 @@ package system
|
|||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
|
|
@ -17,14 +18,32 @@ const (
|
|||
darwinX86 = "darwin-x86"
|
||||
metal = "metal"
|
||||
nvidia = "nvidia"
|
||||
amd = "amd"
|
||||
intel = "intel"
|
||||
|
||||
amd = "amd"
|
||||
intel = "intel"
|
||||
|
||||
nvidiaCuda13 = "nvidia-cuda-13"
|
||||
nvidiaCuda12 = "nvidia-cuda-12"
|
||||
nvidiaL4TCuda12 = "nvidia-l4t-cuda-12"
|
||||
nvidiaL4TCuda13 = "nvidia-l4t-cuda-13"
|
||||
|
||||
capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
|
||||
capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
|
||||
defaultRunFile = "/run/localai/capability"
|
||||
)
|
||||
|
||||
var (
|
||||
cuda13DirExists bool
|
||||
cuda12DirExists bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
_, err := os.Stat(filepath.Join("usr", "local", "cuda-13"))
|
||||
cuda13DirExists = err == nil
|
||||
_, err = os.Stat(filepath.Join("usr", "local", "cuda-12"))
|
||||
cuda12DirExists = err == nil
|
||||
}
|
||||
|
||||
func (s *SystemState) Capability(capMap map[string]string) string {
|
||||
reportedCapability := s.getSystemCapabilities()
|
||||
|
||||
|
|
@ -77,12 +96,26 @@ func (s *SystemState) getSystemCapabilities() string {
|
|||
|
||||
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
||||
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
|
||||
if s.GPUVendor == "nvidia" {
|
||||
if s.GPUVendor == nvidia {
|
||||
log.Info().Msgf("Using nvidia-l4t capability (arm64 on linux), set %s to override", capabilityEnv)
|
||||
if cuda13DirExists {
|
||||
return nvidiaL4TCuda13
|
||||
}
|
||||
if cuda12DirExists {
|
||||
return nvidiaL4TCuda12
|
||||
}
|
||||
return nvidiaL4T
|
||||
}
|
||||
}
|
||||
|
||||
if cuda13DirExists {
|
||||
return nvidiaCuda13
|
||||
}
|
||||
|
||||
if cuda12DirExists {
|
||||
return nvidiaCuda12
|
||||
}
|
||||
|
||||
if s.GPUVendor == "" {
|
||||
log.Info().Msgf("Default capability (no GPU detected), set %s to override", capabilityEnv)
|
||||
return defaultCapability
|
||||
|
|
@ -103,13 +136,13 @@ func detectGPUVendor(gpus []*gpu.GraphicsCard) (string, error) {
|
|||
if gpu.DeviceInfo != nil {
|
||||
if gpu.DeviceInfo.Vendor != nil {
|
||||
gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name)
|
||||
if strings.Contains(gpuVendorName, "NVIDIA") {
|
||||
if strings.Contains(gpuVendorName, strings.ToUpper(nvidia)) {
|
||||
return nvidia, nil
|
||||
}
|
||||
if strings.Contains(gpuVendorName, "AMD") {
|
||||
if strings.Contains(gpuVendorName, strings.ToUpper(amd)) {
|
||||
return amd, nil
|
||||
}
|
||||
if strings.Contains(gpuVendorName, "INTEL") {
|
||||
if strings.Contains(gpuVendorName, strings.ToUpper(intel)) {
|
||||
return intel, nil
|
||||
}
|
||||
}
|
||||
|
|
@ -131,7 +164,7 @@ func (s *SystemState) BackendPreferenceTokens() []string {
|
|||
case strings.HasPrefix(capStr, amd):
|
||||
return []string{"rocm", "hip", "vulkan", "cpu"}
|
||||
case strings.HasPrefix(capStr, intel):
|
||||
return []string{"sycl", "intel", "cpu"}
|
||||
return []string{"sycl", intel, "cpu"}
|
||||
case strings.HasPrefix(capStr, metal):
|
||||
return []string{"metal", "cpu"}
|
||||
case strings.HasPrefix(capStr, darwinX86):
|
||||
|
|
|
|||
Loading…
Reference in a new issue