mirror of
https://github.com/ashim-hq/ashim
synced 2026-04-21 13:37:52 +00:00
perf: parallelize model downloads and switch to registry cache
- Parallelize all 14 model downloads using ThreadPoolExecutor (6 workers) Downloads were sequential (~30 min), now concurrent (~5-10 min) - Switch Docker cache from type=gha to type=registry (GHCR) GHA cache has 10 GB limit causing blob eviction and corrupted builds Registry cache has no size limit and persists across runner instances - Add pip download cache mounts to all pip install layers Prevents re-downloading packages when layers rebuild
This commit is contained in:
parent
2fd0c00564
commit
79c4ed6a35
4 changed files with 69 additions and 25 deletions
11
.github/workflows/ci.yml
vendored
11
.github/workflows/ci.yml
vendored
|
|
@ -99,6 +99,13 @@ jobs:
|
|||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to GHCR (for registry cache)
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
|
||||
- uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
|
|
@ -106,5 +113,5 @@ jobs:
|
|||
push: false
|
||||
tags: ashim:ci
|
||||
build-args: SKIP_MODEL_DOWNLOADS=true
|
||||
cache-from: type=gha,scope=unified
|
||||
cache-to: type=gha,mode=max,scope=unified
|
||||
cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:cache-linux-amd64
|
||||
cache-to: type=registry,ref=ghcr.io/${{ github.repository }}:cache-ci,mode=max
|
||||
|
|
|
|||
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
|
|
@ -121,8 +121,8 @@ jobs:
|
|||
platforms: ${{ matrix.platform }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,"name=ashimhq/ashim,ghcr.io/${{ github.repository }}",push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=${{ env.PLATFORM_PAIR }}
|
||||
cache-to: type=gha,mode=max,scope=${{ env.PLATFORM_PAIR }}
|
||||
cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:cache-${{ env.PLATFORM_PAIR }}
|
||||
cache-to: type=registry,ref=ghcr.io/${{ github.repository }}:cache-${{ env.PLATFORM_PAIR }},mode=max
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
|
|
|
|||
|
|
@ -164,7 +164,8 @@ RUN apt-get -o Acquire::Retries=3 update && apt-get install -y --no-install-reco
|
|||
COPY --from=caire-builder /tmp/caire /usr/local/bin/caire
|
||||
|
||||
# Python venv - Layer 1: Base packages (rarely change, ~3 GB)
|
||||
RUN python3 -m venv /opt/venv && \
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
python3 -m venv /opt/venv && \
|
||||
/opt/venv/bin/pip install --upgrade pip && \
|
||||
/opt/venv/bin/pip install \
|
||||
Pillow==11.1.0 \
|
||||
|
|
@ -172,14 +173,16 @@ RUN python3 -m venv /opt/venv && \
|
|||
opencv-python-headless==4.10.0.84
|
||||
|
||||
# Platform-conditional ONNX runtime
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
/opt/venv/bin/pip install onnxruntime-gpu==1.20.1 \
|
||||
; else \
|
||||
/opt/venv/bin/pip install onnxruntime==1.20.1 \
|
||||
; fi
|
||||
|
||||
# Python venv - Layer 2: Tool packages (change occasionally, ~2 GB)
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
/opt/venv/bin/pip install rembg==2.0.62 && \
|
||||
/opt/venv/bin/pip install realesrgan==0.3.0 \
|
||||
--extra-index-url https://download.pytorch.org/whl/cu126 && \
|
||||
|
|
@ -193,17 +196,20 @@ RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
|||
; fi
|
||||
|
||||
# mediapipe 0.10.21 only has amd64 wheels; arm64 maxes out at 0.10.18
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
/opt/venv/bin/pip install mediapipe==0.10.21 \
|
||||
; else \
|
||||
/opt/venv/bin/pip install mediapipe==0.10.18 \
|
||||
; fi
|
||||
|
||||
# CodeFormer face enhancement (install with --no-deps to avoid numpy 2.x conflict)
|
||||
RUN /opt/venv/bin/pip install --no-deps codeformer-pip==0.0.4 lpips
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
/opt/venv/bin/pip install --no-deps codeformer-pip==0.0.4 lpips
|
||||
|
||||
# Re-pin numpy to 1.26.4 in case any transitive dep upgraded it
|
||||
RUN /opt/venv/bin/pip install numpy==1.26.4
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
/opt/venv/bin/pip install numpy==1.26.4
|
||||
|
||||
# Pin rembg model storage to a fixed path so models downloaded at build time
|
||||
# (as root) are found at runtime (as the non-root ashim user, home=/app).
|
||||
|
|
|
|||
|
|
@ -638,21 +638,52 @@ def smoke_test():
|
|||
|
||||
|
||||
def main():
|
||||
print("Pre-downloading all ML models...\n")
|
||||
download_lama_model()
|
||||
download_rembg_models()
|
||||
download_realesrgan_model()
|
||||
download_gfpgan_model()
|
||||
download_codeformer_model()
|
||||
download_ddcolor_model()
|
||||
download_codeformer_onnx_model()
|
||||
download_paddleocr_models()
|
||||
download_paddleocr_vl_model()
|
||||
download_scunet_model()
|
||||
download_nafnet_model()
|
||||
download_facexlib_models()
|
||||
download_opencv_colorize_models()
|
||||
download_mediapipe_task_models()
|
||||
import concurrent.futures
|
||||
import threading
|
||||
|
||||
print("Pre-downloading all ML models (parallel)...\n")
|
||||
print_lock = threading.Lock()
|
||||
|
||||
# All download functions are independent (separate dirs, separate CDNs).
|
||||
# Run them in parallel to cut download time from ~30 min to ~5-10 min.
|
||||
download_fns = [
|
||||
download_lama_model,
|
||||
download_rembg_models,
|
||||
download_realesrgan_model,
|
||||
download_gfpgan_model,
|
||||
download_codeformer_model,
|
||||
download_ddcolor_model,
|
||||
download_codeformer_onnx_model,
|
||||
download_paddleocr_models,
|
||||
download_paddleocr_vl_model,
|
||||
download_scunet_model,
|
||||
download_nafnet_model,
|
||||
download_facexlib_models,
|
||||
download_opencv_colorize_models,
|
||||
download_mediapipe_task_models,
|
||||
]
|
||||
|
||||
# 6 workers balances parallelism with CDN rate limits
|
||||
errors = []
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as pool:
|
||||
future_to_name = {
|
||||
pool.submit(fn): fn.__name__ for fn in download_fns
|
||||
}
|
||||
for future in concurrent.futures.as_completed(future_to_name):
|
||||
name = future_to_name[future]
|
||||
try:
|
||||
future.result()
|
||||
except Exception as e:
|
||||
errors.append((name, e))
|
||||
print(f"\n*** {name} FAILED: {e}\n")
|
||||
|
||||
if errors:
|
||||
print(f"\n{len(errors)} download(s) failed:")
|
||||
for name, e in errors:
|
||||
print(f" {name}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print("\nAll downloads complete. Running verification...\n")
|
||||
verify_mediapipe()
|
||||
smoke_test()
|
||||
print("All models downloaded and verified.")
|
||||
|
|
|
|||
Loading…
Reference in a new issue