perf: parallelize model downloads and switch to registry cache

- Parallelize all 14 model downloads using ThreadPoolExecutor (6 workers) Downloads were sequential (~30 min), now concurrent (~5-10 min) - Switch Docker cache from type=gha to type=registry (GHCR) GHA cache has 10 GB limit causing blob eviction and corrupted builds Registry cache has no size limit and persists across runner instances - Add pip download cache mounts to all pip install layers Prevents re-downloading packages when layers rebuild
2026-04-21 13:37:52 +00:00 · 2026-04-17 14:54:01 +08:00 · 2026-04-17 14:54:01 +08:00 · 79c4ed6a35
commit 79c4ed6a35
parent 2fd0c00564
4 changed files with 69 additions and 25 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -99,6 +99,13 @@ jobs:

      - uses: docker/setup-buildx-action@v3

+      - name: Log in to GHCR (for registry cache)
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GHCR_TOKEN }}
+
      - uses: docker/build-push-action@v6
        with:
          context: .
@ -106,5 +113,5 @@ jobs:
          push: false
          tags: ashim:ci
          build-args: SKIP_MODEL_DOWNLOADS=true
-          cache-from: type=gha,scope=unified
-          cache-to: type=gha,mode=max,scope=unified
+          cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:cache-linux-amd64
+          cache-to: type=registry,ref=ghcr.io/${{ github.repository }}:cache-ci,mode=max
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -121,8 +121,8 @@ jobs:
          platforms: ${{ matrix.platform }}
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,"name=ashimhq/ashim,ghcr.io/${{ github.repository }}",push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=gha,scope=${{ env.PLATFORM_PAIR }}
-          cache-to: type=gha,mode=max,scope=${{ env.PLATFORM_PAIR }}
+          cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:cache-${{ env.PLATFORM_PAIR }}
+          cache-to: type=registry,ref=ghcr.io/${{ github.repository }}:cache-${{ env.PLATFORM_PAIR }},mode=max

      - name: Export digest
        run: |
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -164,7 +164,8 @@ RUN apt-get -o Acquire::Retries=3 update && apt-get install -y --no-install-reco
 COPY --from=caire-builder /tmp/caire /usr/local/bin/caire

 # Python venv - Layer 1: Base packages (rarely change, ~3 GB)
-RUN python3 -m venv /opt/venv && \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m venv /opt/venv && \
    /opt/venv/bin/pip install --upgrade pip && \
    /opt/venv/bin/pip install \
        Pillow==11.1.0 \
@ -172,14 +173,16 @@ RUN python3 -m venv /opt/venv && \
        opencv-python-headless==4.10.0.84

 # Platform-conditional ONNX runtime
-RUN if [ "$TARGETARCH" = "amd64" ]; then \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    if [ "$TARGETARCH" = "amd64" ]; then \
    /opt/venv/bin/pip install onnxruntime-gpu==1.20.1 \
 ; else \
    /opt/venv/bin/pip install onnxruntime==1.20.1 \
 ; fi

 # Python venv - Layer 2: Tool packages (change occasionally, ~2 GB)
-RUN if [ "$TARGETARCH" = "amd64" ]; then \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    if [ "$TARGETARCH" = "amd64" ]; then \
    /opt/venv/bin/pip install rembg==2.0.62 && \
    /opt/venv/bin/pip install realesrgan==0.3.0 \
        --extra-index-url https://download.pytorch.org/whl/cu126 && \
@ -193,17 +196,20 @@ RUN if [ "$TARGETARCH" = "amd64" ]; then \
 ; fi

 # mediapipe 0.10.21 only has amd64 wheels; arm64 maxes out at 0.10.18
-RUN if [ "$TARGETARCH" = "amd64" ]; then \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    if [ "$TARGETARCH" = "amd64" ]; then \
    /opt/venv/bin/pip install mediapipe==0.10.21 \
 ; else \
    /opt/venv/bin/pip install mediapipe==0.10.18 \
 ; fi

 # CodeFormer face enhancement (install with --no-deps to avoid numpy 2.x conflict)
-RUN /opt/venv/bin/pip install --no-deps codeformer-pip==0.0.4 lpips
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /opt/venv/bin/pip install --no-deps codeformer-pip==0.0.4 lpips

 # Re-pin numpy to 1.26.4 in case any transitive dep upgraded it
-RUN /opt/venv/bin/pip install numpy==1.26.4
+RUN --mount=type=cache,target=/root/.cache/pip \
+    /opt/venv/bin/pip install numpy==1.26.4

 # Pin rembg model storage to a fixed path so models downloaded at build time
 # (as root) are found at runtime (as the non-root ashim user, home=/app).
--- a/docker/download_models.py
+++ b/docker/download_models.py
@ -638,21 +638,52 @@ def smoke_test():


 def main():
-    print("Pre-downloading all ML models...\n")
-    download_lama_model()
-    download_rembg_models()
-    download_realesrgan_model()
-    download_gfpgan_model()
-    download_codeformer_model()
-    download_ddcolor_model()
-    download_codeformer_onnx_model()
-    download_paddleocr_models()
-    download_paddleocr_vl_model()
-    download_scunet_model()
-    download_nafnet_model()
-    download_facexlib_models()
-    download_opencv_colorize_models()
-    download_mediapipe_task_models()
+    import concurrent.futures
+    import threading
+
+    print("Pre-downloading all ML models (parallel)...\n")
+    print_lock = threading.Lock()
+
+    # All download functions are independent (separate dirs, separate CDNs).
+    # Run them in parallel to cut download time from ~30 min to ~5-10 min.
+    download_fns = [
+        download_lama_model,
+        download_rembg_models,
+        download_realesrgan_model,
+        download_gfpgan_model,
+        download_codeformer_model,
+        download_ddcolor_model,
+        download_codeformer_onnx_model,
+        download_paddleocr_models,
+        download_paddleocr_vl_model,
+        download_scunet_model,
+        download_nafnet_model,
+        download_facexlib_models,
+        download_opencv_colorize_models,
+        download_mediapipe_task_models,
+    ]
+
+    # 6 workers balances parallelism with CDN rate limits
+    errors = []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=6) as pool:
+        future_to_name = {
+            pool.submit(fn): fn.__name__ for fn in download_fns
+        }
+        for future in concurrent.futures.as_completed(future_to_name):
+            name = future_to_name[future]
+            try:
+                future.result()
+            except Exception as e:
+                errors.append((name, e))
+                print(f"\n*** {name} FAILED: {e}\n")
+
+    if errors:
+        print(f"\n{len(errors)} download(s) failed:")
+        for name, e in errors:
+            print(f"  {name}: {e}")
+        sys.exit(1)
+
+    print("\nAll downloads complete. Running verification...\n")
    verify_mediapipe()
    smoke_test()
    print("All models downloaded and verified.")