fix(ci): better handling of latest images for backends (#5735)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-24 09:28:23 +00:00 · 2025-06-27 10:32:58 +02:00 · 2025-06-27 10:32:58 +02:00 · cfc9dfa3d5
commit cfc9dfa3d5
parent 6a650e68cb
2 changed files with 3 additions and 62 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@ -26,7 +26,6 @@ jobs:
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      backend: ${{ matrix.backend }}
-      latest-image: ${{ matrix.latest-image }}
      dockerfile: $${ matrix.dockerfile }}
      context: $${ matrix.context }}
    secrets:
@ -49,7 +48,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "rerankers"
-            latest-image: 'latest-gpu-nvidia-cuda-11-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -61,7 +59,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "vllm"
-            latest-image: 'latest-gpu-nvidia-cuda-11-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -73,7 +70,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "transformers"
-            latest-image: 'latest-gpu-nvidia-cuda-11-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -85,7 +81,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "diffusers"
-            latest-image: 'latest-gpu-nvidia-cuda-11-diffusers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 11 additional backends
@ -98,7 +93,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "kokoro"
-            latest-image: 'latest-gpu-nvidia-cuda-11-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -110,7 +104,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-nvidia-cuda-11-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -122,7 +115,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "coqui"
-            latest-image: 'latest-gpu-nvidia-cuda-11-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -134,7 +126,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "bark"
-            latest-image: 'latest-gpu-nvidia-cuda-11-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -146,7 +137,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "chatterbox"
-            latest-image: 'latest-gpu-nvidia-cuda-11-chatterbox'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 12 builds
@ -159,7 +149,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "rerankers"
-            latest-image: 'latest-gpu-nvidia-cuda-12-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -171,7 +160,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "vllm"
-            latest-image: 'latest-gpu-nvidia-cuda-12-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -183,7 +171,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "transformers"
-            latest-image: 'latest-gpu-nvidia-cuda-12-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -194,8 +181,7 @@ jobs:
            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
-            backend: "diffusers"
-            latest-image: 'latest-gpu-nvidia-cuda-12-diffusers'
+            backend: "diffusers"            
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 12 additional backends
@ -208,7 +194,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "kokoro"
-            latest-image: 'latest-gpu-nvidia-cuda-12-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -220,7 +205,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-nvidia-cuda-12-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -232,7 +216,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "coqui"
-            latest-image: 'latest-gpu-nvidia-cuda-12-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -244,7 +227,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "bark"
-            latest-image: 'latest-gpu-nvidia-cuda-12-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
@ -256,7 +238,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "chatterbox"
-            latest-image: 'latest-gpu-nvidia-cuda-12-chatterbox'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # hipblas builds
@ -269,7 +250,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "rerankers"
-            latest-image: 'latest-gpu-rocm-hipblas-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@ -281,7 +261,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "vllm"
-            latest-image: 'latest-gpu-rocm-hipblas-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@ -293,7 +272,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "transformers"
-            latest-image: 'latest-gpu-rocm-hipblas-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@ -305,7 +283,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "diffusers"
-            latest-image: 'latest-gpu-rocm-hipblas-diffusers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # ROCm additional backends
@ -318,7 +295,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "kokoro"
-            latest-image: 'latest-gpu-rocm-hipblas-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@ -330,7 +306,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-rocm-hipblas-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@ -342,7 +317,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "coqui"
-            latest-image: 'latest-gpu-rocm-hipblas-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
@ -354,7 +328,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            backend: "bark"
-            latest-image: 'latest-gpu-rocm-hipblas-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # sycl builds
@ -367,7 +340,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "rerankers"
-            latest-image: 'latest-gpu-intel-sycl-f32-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@ -379,7 +351,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "rerankers"
-            latest-image: 'latest-gpu-intel-sycl-f16-rerankers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@ -391,7 +362,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "vllm"
-            latest-image: 'latest-gpu-intel-sycl-f32-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@ -403,7 +373,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "vllm"
-            latest-image: 'latest-gpu-intel-sycl-f16-vllm'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@ -415,7 +384,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "transformers"
-            latest-image: 'latest-gpu-intel-sycl-f32-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@ -427,7 +395,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "transformers"
-            latest-image: 'latest-gpu-intel-sycl-f16-transformers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@ -439,7 +406,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "diffusers"
-            latest-image: 'latest-gpu-intel-sycl-f32-diffusers'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # SYCL additional backends
@ -452,7 +418,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "kokoro"
-            latest-image: 'latest-gpu-intel-sycl-f32-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@ -464,7 +429,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "kokoro"
-            latest-image: 'latest-gpu-intel-sycl-f16-kokoro'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@ -476,7 +440,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-intel-sycl-f32-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@ -488,7 +451,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "faster-whisper"
-            latest-image: 'latest-gpu-intel-sycl-f16-faster-whisper'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@ -500,7 +462,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "coqui"
-            latest-image: 'latest-gpu-intel-sycl-f32-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@ -512,7 +473,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "coqui"
-            latest-image: 'latest-gpu-intel-sycl-f16-coqui'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
@ -524,7 +484,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "bark"
-            latest-image: 'latest-gpu-intel-sycl-f32-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
@ -536,7 +495,6 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            backend: "bark"
-            latest-image: 'latest-gpu-intel-sycl-f16-bark'
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # bark-cpp
@ -549,6 +507,5 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            backend: "bark"
-            latest-image: 'latest-bark-cpp'
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@ -28,10 +28,6 @@ on:
        description: 'Tag latest'
        default: ''
        type: string
-      latest-image:
-        description: 'Tag latest'
-        default: ''
-        type: string
      tag-suffix:
        description: 'Tag suffix'
        default: ''
@ -153,7 +149,7 @@ jobs:
            type=sha
          flavor: |
            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
+            suffix=${{ inputs.tag-suffix }},onlatest=true

      - name: Docker meta for PR
        id: meta_pull_request
@ -168,7 +164,7 @@ jobs:
            type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
          flavor: |
            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
+            suffix=${{ inputs.tag-suffix }},onlatest=true
 ## End testing image
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
@ -210,7 +206,6 @@ jobs:
          cache-from: type=gha
          platforms: ${{ inputs.platforms }}
          push: ${{ github.event_name != 'pull_request' }}
-          load: ${{ github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}

@ -233,18 +228,7 @@ jobs:
          tags: ${{ steps.meta_pull_request.outputs.tags }}
          labels: ${{ steps.meta_pull_request.outputs.labels }}

-      - name: Cleanup
-        run: |
-          docker builder prune -f
-          docker system prune --force --volumes --all

-      - name: Latest tag
-        if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
-        run: |
-          docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }}
-          docker push localai/localai-backends:${{ inputs.latest-image }}
-          docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
-          docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}

      - name: job summary
        run: |