LocalAI/backend/python
Richard Palethorpe 4916f8c880
Some checks are pending
build backend container images / backend-jobs (ace-step, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-ace-step, 2404) (push) Waiting to run
build backend container images / backend-jobs (ace-step, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-ace-step, 2404) (push) Waiting to run
build backend container images / backend-jobs (ace-step, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-ace-step, 2404) (push) Waiting to run
build backend container images / backend-jobs (ace-step, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-ace-step, 2404) (push) Waiting to run
build backend container images / backend-jobs (ace-step, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-ace-step, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f16, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f16-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f32, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f32-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, ./, 12, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-acestep-cpp, 2204) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-rocm-hipblas-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (acestep-cpp, ubuntu:24.04, vulkan, ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -gpu-vulkan-acestep-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (chatterbox, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-arm64-chatterbox, 2204) (push) Waiting to run
build backend container images / backend-jobs (chatterbox, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-chatterbox, 2404) (push) Waiting to run
build backend container images / backend-jobs (chatterbox, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-chatterbox, 2404) (push) Waiting to run
build backend container images / backend-jobs (chatterbox, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-chatterbox, 2404) (push) Waiting to run
build backend container images / backend-jobs (chatterbox, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-chatterbox, 2404) (push) Waiting to run
build backend container images / backend-jobs (coqui, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-coqui, 2404) (push) Waiting to run
build backend container images / backend-jobs (coqui, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, bigger-runner, false, auto, -gpu-rocm-hipblas-coqui, 2404) (push) Waiting to run
build backend container images / backend-jobs (coqui, ubuntu:24.04, cublas, ./, 12, 9, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-coqui, 2404) (push) Waiting to run
build backend container images / backend-jobs (diffusers, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-diffusers, 2404) (push) Waiting to run
build backend container images / backend-jobs (diffusers, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-diffusers, 2204) (push) Waiting to run
build backend container images / backend-jobs (diffusers, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-diffusers, 2404) (push) Waiting to run
build backend container images / backend-jobs (diffusers, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-diffusers, 2404) (push) Waiting to run
build backend container images / backend-jobs (diffusers, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-diffusers, 2404) (push) Waiting to run
build backend container images / backend-jobs (diffusers, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-diffusers, 2404) (push) Waiting to run
build backend container images / backend-jobs (diffusers, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-diffusers, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-qwen3-tts, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-faster-qwen3-tts, 2204) (push) Waiting to run
build backend container images / backend-jobs (faster-qwen3-tts, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-faster-qwen3-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-qwen3-tts, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-faster-qwen3-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-qwen3-tts, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-faster-qwen3-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-faster-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-faster-whisper, 2204) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, bigger-runner, false, auto, -gpu-rocm-hipblas-faster-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, true, auto, -cpu-faster-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-faster-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-faster-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-faster-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (fish-speech, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-fish-speech, 2404) (push) Waiting to run
build backend container images / backend-jobs (fish-speech, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-fish-speech, 2204) (push) Waiting to run
build backend container images / backend-jobs (fish-speech, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-fish-speech, 2404) (push) Waiting to run
build backend container images / backend-jobs (fish-speech, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-fish-speech, 2404) (push) Waiting to run
build backend container images / backend-jobs (kokoros, ubuntu:24.04, , ./, , , ./backend/Dockerfile.rust, linux/amd64, ubuntu-latest, false, auto, -cpu-kokoros, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f16, ./, , , ./backend/Dockerfile.llama-cpp, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f16-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.llama-cpp, linux/amd64, bigger-runner, false, auto, -gpu-nvidia-cuda-12-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.llama-cpp, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-mlx, 2404) (push) Waiting to run
build backend container images / backend-jobs (fish-speech, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-fish-speech, 2404) (push) Waiting to run
build backend container images / backend-jobs (fish-speech, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-fish-speech, 2404) (push) Waiting to run
build backend container images / backend-jobs (fish-speech, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-fish-speech, 2404) (push) Waiting to run
build backend container images / backend-jobs (ik-llama-cpp, ubuntu:24.04, , ./, , , ./backend/Dockerfile.ik-llama-cpp, linux/amd64, bigger-runner, false, auto, -cpu-ik-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (insightface, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-insightface, 2404) (push) Waiting to run
build backend container images / backend-jobs (insightface, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-insightface, 2404) (push) Waiting to run
build backend container images / backend-jobs (kitten-tts, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -kitten-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (kokoro, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-kokoro, 2404) (push) Waiting to run
build backend container images / backend-jobs (kokoro, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-kokoro, 2204) (push) Waiting to run
build backend container images / backend-jobs (kokoro, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-kokoro, 2404) (push) Waiting to run
build backend container images / backend-jobs (kokoro, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-kokoro, 2404) (push) Waiting to run
build backend container images / backend-jobs (kokoro, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-kokoro, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, intel/oneapi-basekit:2025.3.2-0-devel-ubuntu24.04, sycl_f32, ./, , , ./backend/Dockerfile.llama-cpp, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f32-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, ./, 12, 0, ./backend/Dockerfile.llama-cpp, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-llama-cpp, 2204) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.llama-cpp, linux/amd64, ubuntu-latest, false, auto, -gpu-rocm-hipblas-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, ubuntu:24.04, , ./, , , ./backend/Dockerfile.llama-cpp, linux/amd64,linux/arm64, bigger-runner, false, auto, -cpu-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.llama-cpp, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp, ubuntu:24.04, vulkan, ./, , , ./backend/Dockerfile.llama-cpp, linux/amd64,linux/arm64, bigger-runner, false, auto, -gpu-vulkan-llama-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (llama-cpp-quantization, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, true, auto, -cpu-llama-cpp-quantization, 2404) (push) Waiting to run
build backend container images / backend-jobs (local-store, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-local-store, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-mlx, 2204) (push) Waiting to run
build backend container images / backend-jobs (mlx, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-mlx, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-mlx, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-mlx, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-audio, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-mlx-audio, 2204) (push) Waiting to run
build backend container images / backend-jobs (mlx-audio, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-mlx-audio, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-audio, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-mlx-audio, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-audio, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-mlx-audio, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-audio, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-mlx-audio, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-distributed, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-mlx-distributed, 2204) (push) Waiting to run
build backend container images / backend-jobs (mlx-distributed, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-mlx-distributed, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-distributed, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-mlx-distributed, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-distributed, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-mlx-distributed, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-distributed, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-mlx-distributed, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-vlm, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-mlx-vlm, 2204) (push) Waiting to run
build backend container images / backend-jobs (mlx-vlm, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-mlx-vlm, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-vlm, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-mlx-vlm, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-vlm, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-mlx-vlm, 2404) (push) Waiting to run
build backend container images / backend-jobs (mlx-vlm, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-mlx-vlm, 2404) (push) Waiting to run
build backend container images / backend-jobs (moonshine, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-moonshine, 2404) (push) Waiting to run
build backend container images / backend-jobs (moonshine, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-moonshine, 2404) (push) Waiting to run
build backend container images / backend-jobs (moonshine, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-moonshine, 2404) (push) Waiting to run
build backend container images / backend-jobs (nemo, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-nemo, 2404) (push) Waiting to run
build backend container images / backend-jobs (nemo, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-nemo, 2404) (push) Waiting to run
build backend container images / backend-jobs (nemo, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-nemo, 2404) (push) Waiting to run
build backend container images / backend-jobs (neutts, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-neutts, 2404) (push) Waiting to run
build backend container images / backend-jobs (neutts, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-neutts, 2404) (push) Waiting to run
build backend container images / backend-jobs (neutts, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-neutts, 2404) (push) Waiting to run
build backend container images / backend-jobs (opus, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-opus, 2404) (push) Waiting to run
build backend container images / backend-jobs (outetts, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-outetts, 2404) (push) Waiting to run
build backend container images / backend-jobs (outetts, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-outetts, 2404) (push) Waiting to run
build backend container images / backend-jobs (pocket-tts, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-pocket-tts, 2204) (push) Waiting to run
build backend container images / backend-jobs (pocket-tts, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-pocket-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (pocket-tts, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-pocket-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs-darwin (coqui, mps, -metal-darwin-arm64-coqui) (push) Waiting to run
build backend container images / backend-jobs (nemo, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-nemo, 2404) (push) Waiting to run
build backend container images / backend-jobs (nemo, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-nemo, 2404) (push) Waiting to run
build backend container images / backend-jobs (piper, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -piper, 2404) (push) Waiting to run
build backend container images / backend-jobs (pocket-tts, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-pocket-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-asr, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-qwen-asr, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-asr, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-qwen-asr, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-asr, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-qwen-asr, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-asr, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-qwen-asr, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-asr, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-qwen-asr, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-tts, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-qwen-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-tts, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-qwen-tts, 2204) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, rocm/dev-ubuntu-24.04:6.4.4, hipblas, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-rocm-hipblas-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (rerankers, intel/oneapi-basekit:2025.3.2-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-rerankers, 2404) (push) Waiting to run
build backend container images / backend-jobs (rerankers, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-rocm-hipblas-rerankers, 2404) (push) Waiting to run
build backend container images / backend-jobs (rerankers, ubuntu:24.04, cublas, ./, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-rerankers, 2404) (push) Waiting to run
build backend container images / backend-jobs (rfdetr, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-arm64-rfdetr, 2204) (push) Waiting to run
build backend container images / backend-jobs (rfdetr, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-rfdetr, 2404) (push) Waiting to run
build backend container images / backend-jobs (rfdetr, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-rfdetr, 2404) (push) Waiting to run
build backend container images / backend-jobs (pocket-tts, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-pocket-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (pocket-tts, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-pocket-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (pocket-tts, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-pocket-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-asr, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-qwen-asr, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-asr, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-qwen-asr, 2204) (push) Waiting to run
build backend container images / backend-jobs (qwen-tts, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-qwen-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-tts, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-qwen-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-tts, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-qwen-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-tts, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-qwen-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen-tts, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-qwen-tts, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f16, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f16-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f32, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f32-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, ./, 12, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-qwen3-tts-cpp, 2204) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (qwen3-tts-cpp, ubuntu:24.04, vulkan, ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -gpu-vulkan-qwen3-tts-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (rerankers, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-rerankers, 2404) (push) Waiting to run
build backend container images / backend-jobs (rfdetr, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-rfdetr, 2404) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f16, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f16-sam3-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-sam3-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-sam3-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-sam3-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (sglang, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-sglang, 2404) (push) Waiting to run
build backend container images / backend-jobs (sglang, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-sglang, 2404) (push) Waiting to run
build backend container images / backend-jobs (sherpa-onnx, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-sherpa-onnx, 2404) (push) Waiting to run
build backend container images / backend-jobs (sherpa-onnx, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-sherpa-onnx, 2404) (push) Waiting to run
build backend container images / backend-jobs (sherpa-onnx, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-sherpa-onnx, 2404) (push) Waiting to run
build backend container images / backend-jobs (silero-vad, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-silero-vad, 2404) (push) Waiting to run
build backend container images / backend-jobs (speaker-recognition, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-speaker-recognition, 2404) (push) Waiting to run
build backend container images / backend-jobs (speaker-recognition, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-speaker-recognition, 2404) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f32, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f32-stablediffusion-ggml, 2404) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, ./, 12, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-stablediffusion-ggml, 2204) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -cpu-stablediffusion-ggml, 2404) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-stablediffusion-ggml, 2404) (push) Waiting to run
build backend container images / backend-jobs (rfdetr, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-rfdetr, 2404) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f32, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f32-sam3-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, ./, 12, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-sam3-cpp, 2204) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -cpu-sam3-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (sam3-cpp, ubuntu:24.04, vulkan, ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -gpu-vulkan-sam3-cpp, 2404) (push) Waiting to run
build backend container images / backend-jobs (sglang, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-sglang, 2404) (push) Waiting to run
build backend container images / backend-jobs (sglang, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-nvidia-cuda-12-sglang, 2404) (push) Waiting to run
build backend container images / backend-jobs (sglang, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-sglang, 2404) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f16, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f16-stablediffusion-ggml, 2404) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-stablediffusion-ggml, 2404) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-stablediffusion-ggml, 2404) (push) Waiting to run
build backend container images / backend-jobs (stablediffusion-ggml, ubuntu:24.04, vulkan, ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -gpu-vulkan-stablediffusion-ggml, 2404) (push) Waiting to run
build backend container images / backend-jobs (tinygrad, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -tinygrad, 2404) (push) Waiting to run
build backend container images / backend-jobs (transformers, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-transformers, 2404) (push) Waiting to run
build backend container images / backend-jobs (transformers, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-transformers, 2404) (push) Waiting to run
build backend container images / backend-jobs (transformers, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-transformers, 2404) (push) Waiting to run
build backend container images / backend-jobs (transformers, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-transformers, 2404) (push) Waiting to run
build backend container images / backend-jobs (trl, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-trl, 2404) (push) Waiting to run
build backend container images / backend-jobs (trl, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-trl, 2404) (push) Waiting to run
build backend container images / backend-jobs (trl, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-trl, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f16, ./, , , ./backend/Dockerfile.turboquant, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f16-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f32, ./, , , ./backend/Dockerfile.turboquant, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f32-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, ./, 12, 0, ./backend/Dockerfile.turboquant, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-turboquant, 2204) (push) Waiting to run
build backend container images / backend-jobs (turboquant, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.turboquant, linux/amd64, ubuntu-latest, false, auto, -gpu-rocm-hipblas-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, ubuntu:24.04, , ./, , , ./backend/Dockerfile.turboquant, linux/amd64,linux/arm64, bigger-runner, false, auto, -cpu-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.turboquant, linux/amd64, bigger-runner, false, auto, -gpu-nvidia-cuda-12-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.turboquant, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.turboquant, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (turboquant, ubuntu:24.04, vulkan, ./, , , ./backend/Dockerfile.turboquant, linux/amd64,linux/arm64, bigger-runner, false, auto, -gpu-vulkan-turboquant, 2404) (push) Waiting to run
build backend container images / backend-jobs (vibevoice, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-vibevoice, 2404) (push) Waiting to run
build backend container images / backend-jobs (vibevoice, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-vibevoice, 2404) (push) Waiting to run
build backend container images / backend-jobs (vibevoice, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-vibevoice, 2404) (push) Waiting to run
build backend container images / backend-jobs (vibevoice, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-vibevoice, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm-omni, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-nvidia-cuda-13-vllm-omni, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm-omni, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-vllm-omni, 2404) (push) Waiting to run
build backend container images / backend-jobs (voxcpm, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-voxcpm, 2404) (push) Waiting to run
build backend container images / backend-jobs (voxcpm, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-voxcpm, 2404) (push) Waiting to run
build backend container images / backend-jobs (voxcpm, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -cpu-voxcpm, 2404) (push) Waiting to run
build backend container images / backend-jobs (voxcpm, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-voxcpm, 2404) (push) Waiting to run
build backend container images / backend-jobs (voxcpm, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-voxcpm, 2404) (push) Waiting to run
build backend container images / backend-jobs (voxtral, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-voxtral, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, ./, 12, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-whisper, 2204) (push) Waiting to run
build backend container images / backend-jobs (whisper, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-rocm-hipblas-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, ubuntu:24.04, vulkan, ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -gpu-vulkan-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (vibevoice, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-vibevoice, 2204) (push) Waiting to run
build backend container images / backend-jobs (vibevoice, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-vibevoice, 2404) (push) Waiting to run
build backend container images / backend-jobs (vibevoice, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-vibevoice, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, intel, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-intel-vllm, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-vllm, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, auto, -cpu-vllm, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-nvidia-cuda-12-vllm, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-nvidia-cuda-13-vllm, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-vllm, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm-omni, rocm/dev-ubuntu-24.04:7.2.1, hipblas, ./, , , ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-rocm-hipblas-vllm-omni, 2404) (push) Waiting to run
build backend container images / backend-jobs (vllm-omni, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, arc-runner-set, false, auto, -gpu-nvidia-cuda-12-vllm-omni, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f16, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f16-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04, sycl_f32, ./, , , ./backend/Dockerfile.golang, linux/amd64, ubuntu-latest, false, auto, -gpu-intel-sycl-f32-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, ubuntu:24.04, , ./, , , ./backend/Dockerfile.golang, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -cpu-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisper, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.golang, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-whisper, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisperx, ubuntu:24.04, l4t, ./, 13, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-cuda-13-arm64-whisperx, 2404) (push) Waiting to run
build backend container images / backend-jobs-darwin (ace-step, mps, -metal-darwin-arm64-ace-step) (push) Waiting to run
build backend container images / backend-jobs-darwin (acestep-cpp, metal, go, -metal-darwin-arm64-acestep-cpp) (push) Waiting to run
build backend container images / backend-jobs-darwin (chatterbox, mps, -metal-darwin-arm64-chatterbox) (push) Waiting to run
build backend container images / backend-jobs (whisperx, nvcr.io/nvidia/l4t-jetpack:r36.4.0, l4t, ./, 12, 0, ./backend/Dockerfile.python, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-whisperx, 2204) (push) Waiting to run
build backend container images / backend-jobs (whisperx, ubuntu:24.04, , ./, , , ./backend/Dockerfile.python, linux/amd64,linux/arm64, ubuntu-latest, true, auto, -cpu-whisperx, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisperx, ubuntu:24.04, cublas, ./, 12, 8, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12-whisperx, 2404) (push) Waiting to run
build backend container images / backend-jobs (whisperx, ubuntu:24.04, cublas, ./, 13, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13-whisperx, 2404) (push) Waiting to run
build backend container images / backend-jobs-darwin (local-store, metal, go, -metal-darwin-arm64-local-store) (push) Waiting to run
build backend container images / backend-jobs-darwin (mlx, mps, -metal-darwin-arm64-mlx) (push) Waiting to run
build backend container images / backend-jobs-darwin (mlx-audio, mps, -metal-darwin-arm64-mlx-audio) (push) Waiting to run
build backend container images / backend-jobs-darwin (mlx-distributed, mps, -metal-darwin-arm64-mlx-distributed) (push) Waiting to run
build backend container images / backend-jobs-darwin (mlx-vlm, mps, -metal-darwin-arm64-mlx-vlm) (push) Waiting to run
build backend container images / backend-jobs-darwin (moonshine, mps, -metal-darwin-arm64-moonshine) (push) Waiting to run
build backend container images / backend-jobs-darwin (nemo, mps, -metal-darwin-arm64-nemo) (push) Waiting to run
build backend container images / backend-jobs-darwin (opus, metal, go, -metal-darwin-arm64-opus) (push) Waiting to run
build backend container images / backend-jobs-darwin (piper, metal, go, -metal-darwin-arm64-piper) (push) Waiting to run
build backend container images / backend-jobs-darwin (pocket-tts, mps, -metal-darwin-arm64-pocket-tts) (push) Waiting to run
build backend container images / backend-jobs-darwin (qwen-asr, mps, -metal-darwin-arm64-qwen-asr) (push) Waiting to run
build backend container images / backend-jobs-darwin (qwen-tts, mps, -metal-darwin-arm64-qwen-tts) (push) Waiting to run
build backend container images / backend-jobs-darwin (qwen3-tts-cpp, metal, go, -metal-darwin-arm64-qwen3-tts-cpp) (push) Waiting to run
build backend container images / backend-jobs-darwin (rerankers, mps, -metal-darwin-arm64-rerankers) (push) Waiting to run
build backend container images / backend-jobs-darwin (rfdetr, mps, -metal-darwin-arm64-rfdetr) (push) Waiting to run
build backend container images / backend-jobs-darwin (silero-vad, metal, go, -metal-darwin-arm64-silero-vad) (push) Waiting to run
build backend container images / backend-jobs-darwin (stablediffusion-ggml, metal, go, -metal-darwin-arm64-stablediffusion-ggml) (push) Waiting to run
build backend container images / backend-jobs-darwin (transformers, mps, -metal-darwin-arm64-transformers) (push) Waiting to run
build backend container images / backend-jobs-darwin (diffusers, mps, -metal-darwin-arm64-diffusers) (push) Waiting to run
build backend container images / backend-jobs-darwin (faster-whisper, mps, -metal-darwin-arm64-faster-whisper) (push) Waiting to run
build backend container images / backend-jobs-darwin (fish-speech, mps, -metal-darwin-arm64-fish-speech) (push) Waiting to run
build backend container images / backend-jobs-darwin (kitten-tts, mps, -metal-darwin-arm64-kitten-tts) (push) Waiting to run
build backend container images / backend-jobs-darwin (kokoro, mps, -metal-darwin-arm64-kokoro) (push) Waiting to run
build backend container images / backend-jobs-darwin (llama-cpp-quantization, mps, -metal-darwin-arm64-llama-cpp-quantization) (push) Waiting to run
build backend container images / backend-jobs-darwin (voxcpm, mps, -metal-darwin-arm64-voxcpm) (push) Waiting to run
build backend container images / backend-jobs-darwin (voxtral, metal, go, -metal-darwin-arm64-voxtral) (push) Waiting to run
build backend container images / backend-jobs-darwin (whisper, metal, go, -metal-darwin-arm64-whisper) (push) Waiting to run
build backend container images / llama-cpp-darwin-publish (push) Blocked by required conditions
Build test / build-test (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.3.2-0-devel-ubuntu24.04, linux/amd64, arc-runner-set) (push) Waiting to run
Deploy docs to GitHub Pages / build (push) Waiting to run
Deploy docs to GitHub Pages / deploy (push) Blocked by required conditions
build container images / hipblas-jobs (rocm/dev-ubuntu-24.04:7.2.1, hipblas, --jobs=3 --output-sync=target, linux/amd64, ubuntu-latest, auto, -gpu-hipblas, noble, 2404) (push) Waiting to run
build container images / core-image-build (intel/oneapi-basekit:2025.3.2-0-devel-ubuntu24.04, intel, --jobs=3 --output-sync=target, linux/amd64, ubuntu-latest, auto, -gpu-intel, noble, 2404) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 13, 0, --jobs=4 --output-sync=target, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-13, noble, 2404) (push) Waiting to run
build container images / core-image-build (ubuntu:24.04, , --jobs=4 --output-sync=target, linux/amd64,linux/arm64, ubuntu-latest, false, auto, , noble, 2404) (push) Waiting to run
lint / golangci-lint (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-pocket-tts (push) Blocked by required conditions
Tests extras backends / tests-qwen-tts (push) Blocked by required conditions
Tests extras backends / tests-qwen-asr (push) Blocked by required conditions
Tests extras backends / tests-llama-cpp-quantization (push) Blocked by required conditions
build backend container images / backend-jobs-darwin (vibevoice, mps, -metal-darwin-arm64-vibevoice) (push) Waiting to run
build backend container images / backend-jobs-darwin (whisperx, mps, -metal-darwin-arm64-whisperx) (push) Waiting to run
build backend container images / llama-cpp-darwin (1.25.x) (push) Waiting to run
Build test / launcher-build-darwin (push) Waiting to run
Build test / launcher-build-linux (push) Waiting to run
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
build container images / core-image-build (ubuntu:24.04, cublas, 12, 8, --jobs=4 --output-sync=target, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda-12, noble, 2404) (push) Waiting to run
build container images / core-image-build (ubuntu:24.04, vulkan, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, ubuntu-latest, false, auto, -gpu-vulkan, noble, 2404) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-arm64, jammy, 2204) (push) Waiting to run
build container images / gh-runner (ubuntu:24.04, cublas, 13, 0, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, false, auto, -nvidia-l4t-arm64-cuda-13, noble, 2404) (push) Waiting to run
Tests extras backends / detect-changes (push) Waiting to run
Tests extras backends / tests-transformers (push) Blocked by required conditions
Tests extras backends / tests-rerankers (push) Blocked by required conditions
Tests extras backends / tests-diffusers (push) Blocked by required conditions
Tests extras backends / tests-coqui (push) Blocked by required conditions
Tests extras backends / tests-moonshine (push) Blocked by required conditions
Tests extras backends / tests-nemo (push) Blocked by required conditions
Tests extras backends / tests-voxcpm (push) Blocked by required conditions
Tests extras backends / tests-llama-cpp-grpc-transcription (push) Blocked by required conditions
Tests extras backends / tests-sherpa-onnx-grpc-tts (push) Blocked by required conditions
Tests extras backends / tests-ik-llama-cpp-grpc (push) Blocked by required conditions
Tests extras backends / tests-qwen3-tts-cpp (push) Blocked by required conditions
Tests extras backends / tests-voxtral (push) Blocked by required conditions
Tests extras backends / tests-kokoros (push) Blocked by required conditions
Tests extras backends / tests-insightface-grpc (push) Blocked by required conditions
Tests extras backends / tests-llama-cpp-grpc (push) Blocked by required conditions
Tests extras backends / tests-llama-cpp-smoke (push) Waiting to run
Tests extras backends / tests-sherpa-onnx-realtime (push) Blocked by required conditions
Tests extras backends / tests-sherpa-onnx-grpc-transcription (push) Blocked by required conditions
Tests extras backends / tests-turboquant-grpc (push) Blocked by required conditions
Tests extras backends / tests-acestep-cpp (push) Blocked by required conditions
Tests extras backends / tests-speaker-recognition-grpc (push) Blocked by required conditions
tests / tests-linux (1.26.x) (push) Waiting to run
tests / tests-apple (1.26.x) (push) Waiting to run
tests-aio / tests-aio (push) Waiting to run
E2E Backend Tests / tests-e2e-backend (1.25.x) (push) Waiting to run
UI E2E Tests / tests-ui-e2e (1.26.x) (push) Waiting to run
feat(vllm): expose AsyncEngineArgs via generic engine_args YAML map (#9563)
* feat(vllm): expose AsyncEngineArgs via generic engine_args YAML map

LocalAI's vLLM backend wraps a small typed subset of vLLM's
AsyncEngineArgs (quantization, tensor_parallel_size, dtype, etc.).
Anything outside that subset -- pipeline/data/expert parallelism,
speculative_config, kv_transfer_config, all2all_backend, prefix
caching, chunked prefill, etc. -- requires a new protobuf field, a
Go struct field, an options.go line, and a backend.py mapping per
feature. That cadence is the bottleneck on shipping vLLM's
production feature set.

Add a generic `engine_args:` map on the model YAML that is
JSON-serialised into a new ModelOptions.EngineArgs proto field and
applied verbatim to AsyncEngineArgs at LoadModel time. Validation
is done by the Python backend via dataclasses.fields(); unknown
keys fail with the closest valid name as a hint.
dataclasses.replace() is used so vLLM's __post_init__ re-runs and
auto-converts dict values into nested config dataclasses
(CompilationConfig, AttentionConfig, ...). speculative_config and
kv_transfer_config flow through as dicts; vLLM converts them at
engine init.

Operators can now write:

  engine_args:
    data_parallel_size: 8
    enable_expert_parallel: true
    all2all_backend: deepep_low_latency
    speculative_config:
      method: deepseek_mtp
      num_speculative_tokens: 3
    kv_cache_dtype: fp8

without further proto/Go/Python plumbing per field.

Production defaults seeded by hooks_vllm.go: enable_prefix_caching
and enable_chunked_prefill default to true unless explicitly set.

Existing typed YAML fields (gpu_memory_utilization,
tensor_parallel_size, etc.) remain for back-compat; engine_args
overrides them when both are set.

Assisted-by: Claude:claude-opus-4-7 [Claude Code]
Signed-off-by: Richard Palethorpe <io@richiejp.com>

* chore(vllm): pin cublas13 to vLLM 0.20.0 cu130 wheel

vLLM's PyPI wheel is built against CUDA 12 (libcudart.so.12) and won't
load on a cu130 host. Switch the cublas13 build to vLLM's per-tag cu130
simple-index (https://wheels.vllm.ai/0.20.0/cu130/) and pin
vllm==0.20.0. The cu130-flavoured wheel ships libcudart.so.13 and
includes the DFlash speculative-decoding method that landed in 0.20.0.

cublas13 install gets --index-strategy=unsafe-best-match so uv consults
both the cu130 index and PyPI when resolving — PyPI also publishes
vllm==0.20.0, but with cu12 binaries that error at import time.

Verified: Qwen3.5-4B + z-lab/Qwen3.5-4B-DFlash loads and serves chat
completions on RTX 5070 Ti (sm_120, cu130).

Assisted-by: Claude:claude-opus-4-7 [Claude Code]
Signed-off-by: Richard Palethorpe <io@richiejp.com>

* ci(vllm): bot job to bump cublas13 vLLM wheel pin

vLLM's cu130 wheel index URL is itself version-locked
(wheels.vllm.ai/<TAG>/cu130/, no /latest/ alias upstream), so a vLLM
bump means rewriting two values atomically — the URL segment and the
version constraint. bump_deps.sh handles git-sha-in-Makefile only;
add a sibling bump_vllm_wheel.sh and a matching workflow job that
mirrors the existing matrix's PR-creation pattern.

The bumper queries /releases/latest (which excludes prereleases),
strips the leading 'v', and seds both lines unconditionally. When the
file is already on the latest tag the rewrite is a no-op and
peter-evans/create-pull-request opens no PR.

Assisted-by: Claude:claude-opus-4-7 [Claude Code]
Signed-off-by: Richard Palethorpe <io@richiejp.com>

* docs(vllm): document engine_args and speculative decoding

The new engine_args: map plumbs arbitrary AsyncEngineArgs through to
vLLM, but the public docs only covered the basic typed fields. Add a
short subsection in the vLLM section explaining the typed/generic
split and showing a worked DFlash speculative-decoding config, with
pointers to vLLM's SpeculativeConfig reference and z-lab's drafter
collection.

Assisted-by: Claude:claude-opus-4-7 [Claude Code]
Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-04-29 00:49:28 +02:00
..
ace-step feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
chatterbox feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
common feat: refactor shared helpers and enhance MLX backend functionality (#9335) 2026-04-13 18:44:03 +02:00
coqui chore(deps): bump packaging from 24.1 to 26.2 in /backend/python/coqui (#9594) 2026-04-28 08:44:53 +02:00
diffusers feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
faster-qwen3-tts feat: add distributed mode (#9124) 2026-03-30 00:47:27 +02:00
faster-whisper feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
fish-speech feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
insightface feat: add biometrics UI (#9524) 2026-04-24 08:50:34 +02:00
kitten-tts feat: add distributed mode (#9124) 2026-03-30 00:47:27 +02:00
kokoro feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
llama-cpp-quantization feat: add distributed mode (#9124) 2026-03-30 00:47:27 +02:00
mlx feat: refactor shared helpers and enhance MLX backend functionality (#9335) 2026-04-13 18:44:03 +02:00
mlx-audio feat: add distributed mode (#9124) 2026-03-30 00:47:27 +02:00
mlx-distributed feat: refactor shared helpers and enhance MLX backend functionality (#9335) 2026-04-13 18:44:03 +02:00
mlx-vlm fix(mlx-vlm): pin upstream to v0.4.4 to unblock CUDA builds (#9568) 2026-04-25 22:06:01 +02:00
moonshine feat: add distributed mode (#9124) 2026-03-30 00:47:27 +02:00
nemo feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
neutts feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
outetts feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
pocket-tts feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
qwen-asr feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
qwen-tts feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
rerankers feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
rfdetr feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
sglang feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang (#9553) 2026-04-25 12:26:29 +02:00
speaker-recognition feat: add biometrics UI (#9524) 2026-04-24 08:50:34 +02:00
tinygrad refactor(tinygrad): reuse tinygrad.apps.llm instead of vendored Transformer (#9380) 2026-04-16 22:41:18 +02:00
transformers chore(deps): bump sentence-transformers from 5.2.3 to 5.4.0 in /backend/python/transformers (#9342) 2026-04-14 00:30:27 +02:00
trl feat: add distributed mode (#9124) 2026-03-30 00:47:27 +02:00
vibevoice feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
vllm feat(vllm): expose AsyncEngineArgs via generic engine_args YAML map (#9563) 2026-04-29 00:49:28 +02:00
vllm-omni feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang (#9553) 2026-04-25 12:26:29 +02:00
voxcpm feat(rocm): bump to 7.x (#9323) 2026-04-12 08:51:30 +02:00
whisperx chore(whisperx): drop ROCm/hipblas build target (#9474) 2026-04-21 21:50:18 +02:00
README.md chore: drop bark which is unmaintained (#8207) 2026-01-25 09:26:40 +01:00

Python Backends for LocalAI

This directory contains Python-based AI backends for LocalAI, providing support for various AI models and hardware acceleration targets.

Overview

The Python backends use a unified build system based on libbackend.sh that provides:

  • Automatic virtual environment management with support for both uv and pip
  • Hardware-specific dependency installation (CPU, CUDA, Intel, MLX, etc.)
  • Portable Python support for standalone deployments
  • Consistent backend execution across different environments

Available Backends

Core AI Models

  • transformers - Hugging Face Transformers framework (PyTorch-based)
  • vllm - High-performance LLM inference engine
  • mlx - Apple Silicon optimized ML framework

Audio & Speech

  • coqui - Coqui TTS models
  • faster-whisper - Fast Whisper speech recognition
  • kitten-tts - Lightweight TTS
  • mlx-audio - Apple Silicon audio processing
  • chatterbox - TTS model
  • kokoro - TTS models

Computer Vision

  • diffusers - Stable Diffusion and image generation
  • mlx-vlm - Vision-language models for Apple Silicon
  • rfdetr - Object detection models

Specialized

  • rerankers - Text reranking models

Quick Start

Prerequisites

  • Python 3.10+ (default: 3.10.18)
  • uv package manager (recommended) or pip
  • Appropriate hardware drivers for your target (CUDA, Intel, etc.)

Installation

Each backend can be installed individually:

# Navigate to a specific backend
cd backend/python/transformers

# Install dependencies
make transformers
# or
bash install.sh

# Run the backend
make run
# or
bash run.sh

Using the Unified Build System

The libbackend.sh script provides consistent commands across all backends:

# Source the library in your backend script
source $(dirname $0)/../common/libbackend.sh

# Install requirements (automatically handles hardware detection)
installRequirements

# Start the backend server
startBackend $@

# Run tests
runUnittests

Hardware Targets

The build system automatically detects and configures for different hardware:

  • CPU - Standard CPU-only builds
  • CUDA - NVIDIA GPU acceleration (supports CUDA 12/13)
  • Intel - Intel XPU/GPU optimization
  • MLX - Apple Silicon (M1/M2/M3) optimization
  • HIP - AMD GPU acceleration

Target-Specific Requirements

Backends can specify hardware-specific dependencies:

  • requirements.txt - Base requirements
  • requirements-cpu.txt - CPU-specific packages
  • requirements-cublas12.txt - CUDA 12 packages
  • requirements-cublas13.txt - CUDA 13 packages
  • requirements-intel.txt - Intel-optimized packages
  • requirements-mps.txt - Apple Silicon packages

Configuration Options

Environment Variables

  • PYTHON_VERSION - Python version (default: 3.10)
  • PYTHON_PATCH - Python patch version (default: 18)
  • BUILD_TYPE - Force specific build target
  • USE_PIP - Use pip instead of uv (default: false)
  • PORTABLE_PYTHON - Enable portable Python builds
  • LIMIT_TARGETS - Restrict backend to specific targets

Example: CUDA 12 Only Backend

# In your backend script
LIMIT_TARGETS="cublas12"
source $(dirname $0)/../common/libbackend.sh

Example: Intel-Optimized Backend

# In your backend script
LIMIT_TARGETS="intel"
source $(dirname $0)/../common/libbackend.sh

Development

Adding a New Backend

  1. Create a new directory in backend/python/
  2. Copy the template structure from common/template/
  3. Implement your backend.py with the required gRPC interface
  4. Add appropriate requirements files for your target hardware
  5. Use libbackend.sh for consistent build and execution

Testing

# Run backend tests
make test
# or
bash test.sh

Building

# Install dependencies
make <backend-name>

# Clean build artifacts
make clean

Architecture

Each backend follows a consistent structure:

backend-name/
├── backend.py          # Main backend implementation
├── requirements.txt    # Base dependencies
├── requirements-*.txt  # Hardware-specific dependencies
├── install.sh         # Installation script
├── run.sh            # Execution script
├── test.sh           # Test script
├── Makefile          # Build targets
└── test.py           # Unit tests

Troubleshooting

Common Issues

  1. Missing dependencies: Ensure all requirements files are properly configured
  2. Hardware detection: Check that BUILD_TYPE matches your system
  3. Python version: Verify Python 3.10+ is available
  4. Virtual environment: Use ensureVenv to create/activate environments

Contributing

When adding new backends or modifying existing ones:

  1. Follow the established directory structure
  2. Use libbackend.sh for consistent behavior
  3. Include appropriate requirements files for all target hardware
  4. Add comprehensive tests
  5. Update this README if adding new backend types