feat: unified Docker image with GPU auto-detection (#37)

Merge CPU, CUDA, and lite Docker images into a single unified image. One tag (latest) works on all platforms: amd64 (NVIDIA CUDA) and arm64 (CPU). GPU auto-detected at runtime. All ML models and packages baked in. Key changes: - Platform-conditional Dockerfile (nvidia/cuda on amd64, node on arm64) - tini as PID 1 for proper signal handling - Fix FILES_STORAGE_PATH data loss bug - Fix RealESRGAN upscaler (was broken, always fell back to Lanczos) - Fix PaddleOCR language codes and stdout corruption - Simplified CI/CD (single build, single tag) - Expanded model pre-download with verification - Shutdown timeout, improved health endpoint - Remove unused lama-cleaner
2026-04-21 13:37:52 +00:00 · 2026-04-10 13:21:06 +08:00 · 2026-04-10 13:21:06 +08:00 · b0083e2b08
commit b0083e2b08
parent 7bc979f677
15 changed files with 374 additions and 310 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -80,20 +80,8 @@ jobs:
      - run: pnpm build

  docker:
-    name: Docker Build Test (${{ matrix.tag }})
+    name: Docker Build Test
    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        include:
-          - tag: full
-            variant: full
-            gpu: "false"
-          - tag: lite
-            variant: lite
-            gpu: "false"
-          - tag: cuda
-            variant: full
-            gpu: "true"
    steps:
      - uses: actions/checkout@v4

@ -104,9 +92,6 @@ jobs:
          context: .
          file: docker/Dockerfile
          push: false
-          build-args: |
-            VARIANT=${{ matrix.variant }}
-            GPU=${{ matrix.gpu }}
-          tags: stirling-image:ci-${{ matrix.tag }}
-          cache-from: type=gha,scope=${{ matrix.tag }}
-          cache-to: type=gha,mode=max,scope=${{ matrix.tag }}
+          tags: stirling-image:ci
+          cache-from: type=gha,scope=unified
+          cache-to: type=gha,mode=max,scope=unified
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -49,28 +49,10 @@ jobs:
          fi

  docker:
-    name: Docker (${{ matrix.tag }})
+    name: Docker Build & Push
    needs: release
    if: needs.release.outputs.new_version != ''
    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        include:
-          - tag: full
-            variant: full
-            gpu: "false"
-            suffix: ""
-            platforms: "linux/amd64,linux/arm64"
-          - tag: lite
-            variant: lite
-            gpu: "false"
-            suffix: "-lite"
-            platforms: "linux/amd64,linux/arm64"
-          - tag: cuda
-            variant: full
-            gpu: "true"
-            suffix: "-cuda"
-            platforms: "linux/amd64"
    steps:
      - name: Checkout release tag
        uses: actions/checkout@v4
@ -104,10 +86,10 @@ jobs:
            stirlingimage/stirling-image
            ghcr.io/${{ github.repository }}
          tags: |
-            type=semver,pattern={{version}}${{ matrix.suffix }},value=v${{ needs.release.outputs.new_version }}
-            type=semver,pattern={{major}}.{{minor}}${{ matrix.suffix }},value=v${{ needs.release.outputs.new_version }}
-            type=semver,pattern={{major}}${{ matrix.suffix }},value=v${{ needs.release.outputs.new_version }}
-            type=raw,value=${{ matrix.tag == 'full' && 'latest' || matrix.tag }}
+            type=semver,pattern={{version}},value=v${{ needs.release.outputs.new_version }}
+            type=semver,pattern={{major}}.{{minor}},value=v${{ needs.release.outputs.new_version }}
+            type=semver,pattern={{major}},value=v${{ needs.release.outputs.new_version }}
+            type=raw,value=latest

      - name: Build and push
        uses: docker/build-push-action@v6
@ -115,11 +97,8 @@ jobs:
          context: .
          file: docker/Dockerfile
          push: true
-          build-args: |
-            VARIANT=${{ matrix.variant }}
-            GPU=${{ matrix.gpu }}
-          platforms: ${{ matrix.platforms }}
+          platforms: linux/amd64,linux/arm64
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha,scope=${{ matrix.tag }}
-          cache-to: type=gha,mode=max,scope=${{ matrix.tag }}
+          cache-from: type=gha,scope=unified
+          cache-to: type=gha,mode=max,scope=unified
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -23,6 +23,14 @@ import { teamsRoutes } from "./routes/teams.js";
 import { registerToolRoutes } from "./routes/tools/index.js";
 import { userFileRoutes } from "./routes/user-files.js";

+// Warn about deprecated STIRLING_VARIANT env var
+if (process.env.STIRLING_VARIANT) {
+  console.warn(
+    `WARNING: STIRLING_VARIANT="${process.env.STIRLING_VARIANT}" is set but ignored. ` +
+      "There is now a single unified image with all features. Remove STIRLING_VARIANT from your environment.",
+  );
+}
+
 // Run before anything else
 runMigrations();
 console.log("Database initialized");
@ -108,12 +116,23 @@ await teamsRoutes(app);
 // API docs (Scalar)
 await docsRoutes(app);

-// Public health check (minimal - no internal details)
-app.get("/api/v1/health", async () => ({
-  status: "healthy",
-  version: APP_VERSION,
-  variant: process.env.STIRLING_VARIANT === "lite" ? "lite" : "full",
-}));
+// Public health check (checks core dependencies)
+app.get("/api/v1/health", async (_request, reply) => {
+  let dbOk = false;
+  try {
+    db.select().from(schema.settings).limit(1).get();
+    dbOk = true;
+  } catch {
+    /* db unreachable */
+  }
+
+  const status = dbOk ? "healthy" : "unhealthy";
+  const code = dbOk ? 200 : 503;
+  return reply.code(code).send({
+    status,
+    version: APP_VERSION,
+  });
+});

 // Admin health check (full diagnostics)
 app.get("/api/v1/admin/health", async (request, reply) => {
@ -161,12 +180,19 @@ try {
 }

 // Graceful shutdown
+const SHUTDOWN_TIMEOUT_MS = 8000;
 let shuttingDown = false;
 async function shutdown(signal: string) {
  if (shuttingDown) return;
  shuttingDown = true;
  console.log(`\n${signal} received, shutting down gracefully...`);

+  const forceExit = setTimeout(() => {
+    console.error("Shutdown timed out, forcing exit");
+    process.exit(1);
+  }, SHUTDOWN_TIMEOUT_MS);
+  forceExit.unref();
+
  cleanupCron.stop();

  try {
@ -199,6 +225,7 @@ async function shutdown(signal: string) {
    console.error("Error closing database:", err);
  }

+  clearTimeout(forceExit);
  process.exit(0);
 }

--- a/apps/docs/api/ai.md
+++ b/apps/docs/api/ai.md
@ -4,12 +4,8 @@ The `@stirling-image/ai` package wraps Python ML models in TypeScript functions.

 All model weights are bundled in the Docker image during the build. No downloads happen at runtime.

-::: warning Lite image
-AI tools are not available in the `:lite` Docker image. The API returns `501 Not Available` for these endpoints when running the lite variant. Use `:latest` for AI features. See [Docker Tags](/guide/docker-tags) for details.
-:::
-
 ::: tip GPU acceleration
-The `:cuda` Docker image includes GPU-accelerated versions of the ML libraries. Background removal, upscaling, and OCR all benefit from NVIDIA GPU acceleration. The image auto-detects your GPU and falls back to CPU if none is available. See [Docker Tags](/guide/docker-tags) for setup.
+The Docker image includes CUDA-accelerated ML libraries on amd64. Add `--gpus all` to your Docker run command to enable GPU acceleration. The image auto-detects your GPU and falls back to CPU if none is available.
 :::

 ## Background removal
@ -73,7 +69,7 @@ Returns the blurred image along with metadata about each detected face region (b

 Removes objects from images by filling in the area with generated content that matches the surroundings.

-**Model:** [LaMa](https://github.com/advimman/lama) (Large Mask Inpainting)
+**Model:** OpenCV TELEA algorithm

 Takes an image and a mask (white = area to erase, black = keep). Returns the inpainted image.

--- a/apps/docs/guide/architecture.md
+++ b/apps/docs/guide/architecture.md
@ -34,7 +34,7 @@ Supported operations:
 - **Upscaling** -- RealESRGAN
 - **OCR** -- PaddleOCR
 - **Face detection/blurring** -- MediaPipe
- **Object erasing (inpainting)** -- LaMa Cleaner
+- **Object erasing (inpainting)** -- OpenCV

 Python scripts live in `packages/ai/python/`. The Docker image pre-downloads all model weights during the build so the container works offline.

--- a/apps/docs/guide/deployment.md
+++ b/apps/docs/guide/deployment.md
@ -1,16 +1,8 @@
 # Deployment

-Stirling Image ships as a single Docker container. The image supports **linux/amd64** and **linux/arm64**, so it runs natively on Intel/AMD servers, Apple Silicon Macs, and ARM devices like the Raspberry Pi 4/5.
+Stirling Image ships as a single Docker container. The image supports **linux/amd64** (with NVIDIA CUDA) and **linux/arm64** (CPU), so it runs natively on Intel/AMD servers, Apple Silicon Macs, and ARM devices like the Raspberry Pi 4/5.

-Three variants are available:
-
-| Variant | Tag | Size | What's included |
-|---------|-----|------|-----------------|
-| Full | `:latest` | ~11 GB | All tools + AI/ML (background removal, upscaling, OCR, face blur, object eraser) |
-| Lite | `:lite` | ~1.5 GB | All image processing tools, no AI/ML |
-| CUDA | `:cuda` | ~14 GB | Full + GPU-accelerated AI (NVIDIA only, amd64) |
-
-See [Docker Tags](./docker-tags) for the full comparison, Docker Compose examples, and version pinning.
+See [Docker Image](./docker-tags) for GPU setup, Docker Compose examples, and version pinning.

 ## Docker Compose (recommended)

@ -64,14 +56,14 @@ Everything runs from a single process. The Fastify server handles API requests a
 - RealESRGAN (upscaling)
 - PaddleOCR (text recognition)
 - MediaPipe (face detection)
- LaMa Cleaner (inpainting/object removal)
+- OpenCV (inpainting/object removal)
 - onnxruntime, opencv-python, Pillow, numpy

-Model weights are downloaded at build time, so the container works fully offline. The lite image (`:lite`) skips all Python packages and model downloads.
+Model weights are downloaded at build time, so the container works fully offline.

 ### Architecture notes

-All core image tools (resize, crop, compress, convert, watermark, etc.) work on both amd64 and arm64. Some ML packages (PaddleOCR, MediaPipe, LaMa Cleaner) have limited arm64 support and may be unavailable on ARM systems. The container logs a warning for any package that could not be installed and falls back gracefully — Tesseract handles OCR and Lanczos handles upscaling when the ML alternatives are missing.
+All tools work on both amd64 and arm64. AI tools (background removal, upscaling, OCR, face detection) use CUDA-accelerated packages on amd64 and CPU packages on arm64. GPU acceleration is auto-detected at runtime when `--gpus all` is passed.

 ## Volumes

@ -120,7 +112,7 @@ Set `client_max_body_size` to match your `MAX_UPLOAD_SIZE_MB` value.

 The GitHub repository has two workflows:

- **release.yml** -- On release, builds multi-arch Docker images (amd64 + arm64) for both the full and lite variants, and pushes to Docker Hub (`stirlingimage/stirling-image`) and GitHub Container Registry (`ghcr.io/stirling-image/stirling-image`).
+- **release.yml** -- On release, builds a multi-arch Docker image (amd64 + arm64), and pushes to Docker Hub (`stirlingimage/stirling-image`) and GitHub Container Registry (`ghcr.io/stirling-image/stirling-image`).
 - **deploy-docs.yml** -- Builds this documentation site and deploys it to GitHub Pages.

 Both run automatically. No manual steps needed after merging to `main`.
--- a/apps/docs/guide/docker-tags.md
+++ b/apps/docs/guide/docker-tags.md
@ -1,53 +1,28 @@
-# Docker Image Tags
+# Docker Image

-Stirling Image ships three Docker image variants to fit different use cases.
+Stirling Image ships as a single Docker image that works on all platforms.

-## Full (default)
+## Quick start

 ```bash
-docker pull stirlingimage/stirling-image:latest
+docker run -d -p 1349:1349 -v stirling-data:/data stirlingimage/stirling-image:latest
 ```

-Includes all tools: image processing, AI-powered background removal, upscaling, face blurring, object erasing, and OCR. Size is ~11 GB due to bundled ML models.
+The app is available at `http://localhost:1349`.

-## Lite
+## GPU acceleration
+
+The image includes CUDA support on amd64. If you have an NVIDIA GPU with the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed, add `--gpus all`:

 ```bash
-docker pull stirlingimage/stirling-image:lite
+docker run -d --gpus all -p 1349:1349 -v stirling-data:/data stirlingimage/stirling-image:latest
 ```

-Includes all image processing tools (resize, crop, rotate, convert, compress, watermark, collage, and 20+ more) but excludes AI/ML tools. Size is ~1-2 GB.
-
-Use this if you:
- Only need standard image processing (no AI features)
- Are running on constrained hardware (Raspberry Pi, small VPS)
- Want faster pulls and smaller disk footprint
-
-### Tools excluded from lite
-
-| Tool | What it does |
-|------|-------------|
-| Remove Background | AI-powered background removal |
-| Upscale | AI super-resolution upscaling |
-| Blur Faces | AI face detection and blurring |
-| Erase Object | AI inpainting to remove objects |
-| OCR | Optical character recognition |
-
-All other tools (27+) work identically in both variants.
-
-## CUDA (GPU acceleration)
-
-```bash
-docker pull stirlingimage/stirling-image:cuda
-```
-
-Same tools as the full image, but built with GPU-accelerated Python packages (onnxruntime-gpu, PyTorch CUDA, PaddlePaddle GPU). The image auto-detects your NVIDIA GPU at runtime and falls back to CPU if none is found.
-
-Requires [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) on the host. Linux amd64 only.
+The image auto-detects your GPU at runtime. Without `--gpus all`, it runs on CPU. Same image either way.

 ### Benchmarks

-Tested on an NVIDIA RTX 4070 (12 GB VRAM) with a 572x1024 JPEG portrait. Both images ran on the same machine. "Warm" means the model is already loaded in memory (second request onward).
+Tested on an NVIDIA RTX 4070 (12 GB VRAM) with a 572x1024 JPEG portrait.

 #### Warm performance

@ -68,10 +43,6 @@ Tested on an NVIDIA RTX 4070 (12 GB VRAM) with a 572x1024 JPEG portrait. Both im
 | Upscale 2x | 3,957ms | 2,318ms | 1.7x |
 | OCR (PaddleOCR) | 1,469ms | 1,090ms | 1.3x |

-Cold start includes loading the model into memory. GPU cold starts are faster because CUDA parallelizes the model loading.
-
-Larger images show bigger speedups, especially for upscaling. Non-AI tools (resize, crop, convert, etc.) are unaffected since they use Sharp (CPU-based).
-
 ### GPU health check

 After the first AI request, the admin health endpoint reports GPU status:
@ -83,8 +54,6 @@ GET /api/v1/admin/health

 ## Docker Compose

-### Full
-
 ```yaml
 services:
  stirling-image:
@ -94,35 +63,24 @@ services:
    volumes:
      - stirling-data:/data
      - stirling-workspace:/tmp/workspace
+    restart: unless-stopped
+    logging:
+      driver: json-file
+      options:
+        max-size: "10m"
+        max-file: "3"

 volumes:
  stirling-data:
  stirling-workspace:
 ```

-### Lite
+For GPU acceleration via Docker Compose, add the deploy section:

 ```yaml
 services:
  stirling-image:
-    image: stirlingimage/stirling-image:lite
-    ports:
-      - "1349:1349"
-    volumes:
-      - stirling-data:/data
-      - stirling-workspace:/tmp/workspace
-
-volumes:
-  stirling-data:
-  stirling-workspace:
-```
-
-### CUDA
-
-```yaml
-services:
-  stirling-image:
-    image: stirlingimage/stirling-image:cuda
+    image: stirlingimage/stirling-image:latest
    ports:
      - "1349:1349"
    volumes:
@ -135,35 +93,34 @@ services:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
+    restart: unless-stopped

 volumes:
  stirling-data:
  stirling-workspace:
 ```

-## Switching from lite to full
-
-To upgrade from lite to full and unlock AI tools:
-
-1. Stop your container
-2. Pull the full image: `docker pull stirlingimage/stirling-image:latest`
-3. Update your compose file or run command to use `:latest` instead of `:lite`
-4. Start the container
-
-Your data and settings are preserved in the volumes.
-
 ## Version pinning

-Both variants support semver tags for pinning:
-
 | Tag | Description |
 |-----|------------|
-| `latest` | Latest full release |
-| `lite` | Latest lite release |
-| `cuda` | Latest full release with GPU support |
-| `1.6.0` | Exact full version |
-| `1.6.0-lite` | Exact lite version |
-| `1.6.0-cuda` | Exact CUDA version |
-| `1.6` | Latest patch in 1.6.x (full) |
-| `1.6-lite` | Latest patch in 1.6.x (lite) |
-| `1.6-cuda` | Latest patch in 1.6.x (CUDA) |
+| `latest` | Latest release |
+| `1.11.0` | Exact version |
+| `1.11` | Latest patch in 1.11.x |
+| `1` | Latest minor in 1.x |
+
+## Platforms
+
+| Architecture | GPU support | Notes |
+|---|---|---|
+| linux/amd64 | NVIDIA CUDA | Full GPU acceleration for AI tools |
+| linux/arm64 | CPU only | Raspberry Pi 4/5, Apple Silicon via Docker Desktop |
+
+## Migration from previous tags
+
+If you were using `:lite` or `:cuda` tags, switch to `:latest`:
+
+- **From `:lite`**: Pull `:latest`. You now have all AI tools included.
+- **From `:cuda`**: Pull `:latest` and keep `--gpus all`. Same GPU support, unified image.
+
+Your data and settings are preserved in the volumes.
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -1,12 +1,9 @@
 # syntax=docker/dockerfile:1
 # ============================================
-# Stirling Image - Production Dockerfile
-# Multi-stage build for single-container deployment
+# Stirling Image - Unified Production Dockerfile
+# Single image: GPU auto-detected on amd64, CPU on arm64
 # ============================================

-ARG VARIANT=full
-ARG GPU=false
-
 # ============================================
 # Stage 1: Build the frontend (Vite + React)
 # ============================================
@ -39,25 +36,22 @@ RUN --mount=type=cache,id=turbo-cache,target=/app/.turbo \
    pnpm --filter @stirling-image/web build

 # ============================================
-# Stage 2: Base image selection
+# Stage 2: Platform-specific base images
 # ============================================
-FROM node:22-bookworm AS base-cpu
-FROM nvidia/cuda:12.6.3-runtime-ubuntu24.04 AS base-gpu
-
-# Select base: GPU=false -> base-cpu, GPU=true -> base-gpu
-FROM base-cpu AS production-base-false
-FROM base-gpu AS production-base-true
+FROM node:22-bookworm AS base-linux-arm64
+FROM nvidia/cuda:12.6.3-runtime-ubuntu24.04 AS base-linux-amd64

 # ============================================
 # Stage 3: Production runtime
 # ============================================
-FROM production-base-${GPU} AS production
+ARG TARGETOS
+ARG TARGETARCH
+FROM base-${TARGETOS}-${TARGETARCH} AS production

-ARG VARIANT
-ARG GPU
+ARG TARGETARCH

-# Install Node.js when using CUDA base (node:22-bookworm already has it)
-RUN if [ "$GPU" = "true" ]; then \
+# Install Node.js on amd64 (CUDA base has no Node; arm64 base already has it)
+RUN if [ "$TARGETARCH" = "amd64" ]; then \
    apt-get update && apt-get install -y --no-install-recommends \
        curl ca-certificates gnupg && \
    mkdir -p /etc/apt/keyrings && \
@ -71,67 +65,65 @@ RUN if [ "$GPU" = "true" ]; then \

 RUN corepack enable && corepack prepare pnpm@9.15.4 --activate

-# System dependencies shared by all variants
+# System dependencies (all platforms)
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    tini \
    imagemagick \
    libraw-dev \
    potrace \
    curl \
    gosu \
    libheif-examples \
+    python3 python3-pip python3-venv python3-dev \
+    tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra tesseract-ocr-spa \
+    build-essential \
+    libgl1 libglib2.0-0 \
    && rm -rf /var/lib/apt/lists/*

-# Python/ML system dependencies (full variant only)
-RUN if [ "$VARIANT" = "full" ]; then \
-    apt-get update && apt-get install -y --no-install-recommends \
-        python3 python3-pip python3-venv python3-dev \
-        tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra tesseract-ocr-spa \
-        build-essential \
-        libgl1 libglib2.0-0 \
-    && rm -rf /var/lib/apt/lists/* \
+# Python venv - Layer 1: Base packages (rarely change, ~3 GB)
+RUN python3 -m venv /opt/venv && \
+    /opt/venv/bin/pip install --upgrade pip && \
+    /opt/venv/bin/pip install \
+        Pillow==11.1.0 \
+        numpy==1.26.4 \
+        opencv-python-headless==4.10.0.84
+
+# Platform-conditional ONNX runtime
+RUN if [ "$TARGETARCH" = "amd64" ]; then \
+    /opt/venv/bin/pip install onnxruntime-gpu==1.20.1 \
+; else \
+    /opt/venv/bin/pip install onnxruntime==1.20.1 \
 ; fi

-# Python venv + ML packages + model weights (full variant only)
-COPY packages/ai/python/requirements.txt /tmp/requirements.txt
-COPY packages/ai/python/requirements-gpu.txt /tmp/requirements-gpu.txt
-RUN if [ "$VARIANT" = "full" ]; then \
-    python3 -m venv /opt/venv && \
-    /opt/venv/bin/pip install --upgrade pip && \
-    if [ "$GPU" = "true" ]; then \
-        /opt/venv/bin/pip install \
-            Pillow numpy opencv-python-headless onnxruntime-gpu && \
-        (/opt/venv/bin/pip install rembg || echo "WARNING: rembg not installed") && \
-        (/opt/venv/bin/pip install realesrgan \
-            --extra-index-url https://download.pytorch.org/whl/cu126 \
-            || echo "WARNING: realesrgan not installed") && \
-        (/opt/venv/bin/pip install paddlepaddle-gpu paddleocr || echo "WARNING: PaddleOCR not installed") && \
-        (/opt/venv/bin/pip install mediapipe || echo "WARNING: mediapipe not installed") && \
-        (/opt/venv/bin/pip install lama-cleaner || echo "WARNING: lama-cleaner not installed") && \
-        (/opt/venv/bin/pip install seam-carving || echo "WARNING: seam-carving not installed") \
-    ; else \
-        /opt/venv/bin/pip install \
-            Pillow numpy opencv-python-headless onnxruntime && \
-        (/opt/venv/bin/pip install "rembg[cpu]" || echo "WARNING: rembg not installed") && \
-        (/opt/venv/bin/pip install realesrgan || echo "WARNING: realesrgan not installed") && \
-        (/opt/venv/bin/pip install paddlepaddle paddleocr || echo "WARNING: PaddleOCR not installed") && \
-        (/opt/venv/bin/pip install mediapipe || echo "WARNING: mediapipe not installed") && \
-        (/opt/venv/bin/pip install lama-cleaner || echo "WARNING: lama-cleaner not installed") && \
-        (/opt/venv/bin/pip install seam-carving || echo "WARNING: seam-carving not installed") \
-    ; fi \
-; fi && rm -f /tmp/requirements.txt /tmp/requirements-gpu.txt
+# Python venv - Layer 2: Tool packages (change occasionally, ~2 GB)
+RUN if [ "$TARGETARCH" = "amd64" ]; then \
+    /opt/venv/bin/pip install rembg==2.0.62 && \
+    /opt/venv/bin/pip install realesrgan==0.3.0 \
+        --extra-index-url https://download.pytorch.org/whl/cu126 && \
+    /opt/venv/bin/pip install paddlepaddle-gpu==3.0.0 \
+        --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ && \
+    /opt/venv/bin/pip install paddleocr==2.9.1 \
+; else \
+    /opt/venv/bin/pip install "rembg[cpu]==2.0.62" && \
+    /opt/venv/bin/pip install realesrgan==0.3.0 && \
+    /opt/venv/bin/pip install paddlepaddle==3.0.0 paddleocr==2.9.1 \
+; fi

+# mediapipe 0.10.21 only has amd64 wheels; arm64 maxes out at 0.10.18
+RUN if [ "$TARGETARCH" = "amd64" ]; then \
+    /opt/venv/bin/pip install mediapipe==0.10.21 \
+; else \
+    /opt/venv/bin/pip install mediapipe==0.10.18 \
+; fi
+
+RUN /opt/venv/bin/pip install seam-carving==1.1.0
+
+# Pre-download and verify all ML models
+# Note: on amd64, paddlepaddle-gpu can't import without the CUDA driver (only
+# available at runtime). The download script gracefully skips PaddleOCR model
+# pre-download in this case; models download on first use at runtime instead.
 COPY docker/download_models.py /tmp/download_models.py
-RUN if [ "$VARIANT" = "full" ]; then \
-    /opt/venv/bin/python3 /tmp/download_models.py && \
-    /opt/venv/bin/python3 -c "\
-try: \
-    from paddleocr import PaddleOCR; \
-    print('Downloading PaddleOCR models...'); \
-    ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False); \
-    print('PaddleOCR models ready'); \
-except: print('PaddleOCR model pre-download skipped') \
-" 2>/dev/null || echo "WARNING: Could not pre-download PaddleOCR models" \
-; fi && rm -f /tmp/download_models.py
+RUN /opt/venv/bin/python3 /tmp/download_models.py && rm -f /tmp/download_models.py

 WORKDIR /app

@ -151,10 +143,8 @@ RUN --mount=type=cache,id=pnpm-store,target=/root/.local/share/pnpm/store/v3 \
    pnpm install --frozen-lockfile --prod

 # Remove build tools no longer needed in production
-RUN if [ "$VARIANT" = "full" ]; then \
-    apt-get purge -y --auto-remove build-essential python3-dev && \
-    rm -rf /var/lib/apt/lists/* \
-; fi
+RUN apt-get purge -y --auto-remove build-essential python3-dev && \
+    rm -rf /var/lib/apt/lists/*

 # Copy source code for API (tsx runs TS directly - no build step needed)
 COPY apps/api/src ./apps/api/src
@ -170,9 +160,9 @@ COPY packages/ai/python ./packages/ai/python
 COPY --from=builder /app/apps/web/dist ./apps/web/dist

 # Create required directories
-RUN mkdir -p /data /tmp/workspace
+RUN mkdir -p /data /data/files /tmp/workspace

-# Environment defaults (matching PRD Section 16.1)
+# Environment defaults
 ENV PORT=1349 \
    NODE_ENV=production \
    AUTH_ENABLED=true \
@ -181,6 +171,7 @@ ENV PORT=1349 \
    STORAGE_MODE=local \
    DB_PATH=/data/stirling.db \
    WORKSPACE_PATH=/tmp/workspace \
+    FILES_STORAGE_PATH=/data/files \
    PYTHON_VENV_PATH=/opt/venv \
    DEFAULT_THEME=light \
    DEFAULT_LOCALE=en \
@ -191,13 +182,20 @@ ENV PORT=1349 \
    MAX_BATCH_SIZE=200 \
    CONCURRENT_JOBS=3 \
    MAX_MEGAPIXELS=100 \
-    RATE_LIMIT_PER_MIN=100 \
-    STIRLING_VARIANT=${VARIANT}
+    RATE_LIMIT_PER_MIN=100
+
+# NVIDIA Container Toolkit env vars (harmless on non-GPU systems)
+ENV NVIDIA_VISIBLE_DEVICES=all \
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# Suppress noisy ML library output in docker logs
+ENV PYTHONWARNINGS=ignore \
+    TF_CPP_MIN_LOG_LEVEL=3 \
+    PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True

 # Create non-root user for runtime
 RUN groupadd -r stirling && useradd -r -g stirling -d /app -s /sbin/nologin stirling
-RUN chown -R stirling:stirling /app /data /tmp/workspace && \
-    ([ -d /opt/venv ] && chown -R stirling:stirling /opt/venv || true)
+RUN chown -R stirling:stirling /app /data /tmp/workspace /opt/venv

 # Entrypoint fixes volume permissions then drops to stirling via gosu
 COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
@ -205,8 +203,9 @@ RUN chmod +x /usr/local/bin/entrypoint.sh

 EXPOSE 1349

-HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:1349/api/v1/health || exit 1

-ENTRYPOINT ["entrypoint.sh"]
+# tini as PID 1 for zombie reaping + signal forwarding
+ENTRYPOINT ["tini", "--", "entrypoint.sh"]
 CMD ["npx", "tsx", "apps/api/src/index.ts"]
--- a/docker/docker-compose.gpu.yml
+++ b/docker/docker-compose.gpu.yml
@ -1,17 +0,0 @@
-# GPU override - use with:
-#   docker compose -f docker/docker-compose.yml -f docker/docker-compose.gpu.yml up
-services:
-  stirling-image:
-    build:
-      context: ..
-      dockerfile: docker/Dockerfile
-      args:
-        GPU: "true"
-    image: stirlingimage/stirling-image:cuda
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -1,32 +1,25 @@
-name: stirling-image
-
 services:
  stirling-image:
    build:
      context: ..
      dockerfile: docker/Dockerfile
+    image: stirlingimage/stirling-image:latest
    container_name: stirling-image
    ports:
      - "1349:1349"
+    volumes:
+      - stirling-data:/data
+      - stirling-workspace:/tmp/workspace
    environment:
      - AUTH_ENABLED=true
      - DEFAULT_USERNAME=admin
      - DEFAULT_PASSWORD=admin
-      - STORAGE_MODE=local
-      - FILE_MAX_AGE_HOURS=24
-      - CLEANUP_INTERVAL_MINUTES=30
-      - MAX_UPLOAD_SIZE_MB=100
-      - MAX_BATCH_SIZE=200
-      - CONCURRENT_JOBS=3
-      - MAX_MEGAPIXELS=100
-      - RATE_LIMIT_PER_MIN=100
-      - DEFAULT_THEME=light
-      - DEFAULT_LOCALE=en
-      - APP_NAME=Stirling Image
-    volumes:
-      - stirling-data:/data
-      - stirling-workspace:/tmp/workspace
    restart: unless-stopped
+    logging:
+      driver: json-file
+      options:
+        max-size: "10m"
+        max-file: "3"

 volumes:
  stirling-data:
--- a/docker/download_models.py
+++ b/docker/download_models.py
@ -1,7 +1,26 @@
-"""Pre-download all rembg models offered in the UI."""
-import sys
+"""Pre-download and verify all ML models for the Docker image.

-MODELS = [
+This script runs at Docker build time. Any failure exits non-zero,
+failing the build. No silent fallbacks.
+"""
+import os
+import sys
+import urllib.request
+
+# Force CPU mode during build - no GPU driver available at build time.
+# Must be set before any ML library import.
+os.environ["PADDLE_DEVICE"] = "cpu"
+os.environ["FLAGS_use_cuda"] = "0"
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+REALESRGAN_MODEL_DIR = "/opt/models/realesrgan"
+REALESRGAN_MODEL_URL = (
+    "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"
+)
+REALESRGAN_MODEL_PATH = os.path.join(REALESRGAN_MODEL_DIR, "RealESRGAN_x4plus.pth")
+REALESRGAN_MIN_SIZE = 60_000_000  # ~67 MB
+
+REMBG_MODELS = [
    "u2net",
    "isnet-general-use",
    "bria-rmbg",
@ -10,18 +29,116 @@ MODELS = [
    "birefnet-general",
 ]

-try:
-    from rembg import new_session
-except ImportError:
-    print("WARNING: rembg not installed, skipping model pre-download")
-    sys.exit(0)
+# PaddleOCR language codes (not ISO). German/French/Spanish use "latin" model.
+# Valid keys: ch, en, korean, japan, chinese_cht, ta, te, ka, latin, arabic, cyrillic, devanagari
+PADDLEOCR_LANGUAGES = ["en", "ch", "japan", "korean", "latin"]

-for model in MODELS:
-    print(f"Downloading {model}...")
-    try:
+
+def download_rembg_models():
+    """Download all rembg ONNX models."""
+    print("=== Downloading rembg models ===")
+    from rembg import new_session
+
+    for model in REMBG_MODELS:
+        print(f"  Downloading {model}...")
        new_session(model)
        print(f"  {model} ready")
-    except Exception as e:
-        print(f"  WARNING: {model} failed: {e}")
+    print(f"All {len(REMBG_MODELS)} rembg models downloaded.\n")

-print("Model pre-download complete")
+
+def download_realesrgan_model():
+    """Download RealESRGAN_x4plus.pth pretrained weights."""
+    print("=== Downloading RealESRGAN model ===")
+    os.makedirs(REALESRGAN_MODEL_DIR, exist_ok=True)
+    print(f"  Downloading from {REALESRGAN_MODEL_URL}...")
+    urllib.request.urlretrieve(REALESRGAN_MODEL_URL, REALESRGAN_MODEL_PATH)
+
+    size = os.path.getsize(REALESRGAN_MODEL_PATH)
+    assert size > REALESRGAN_MIN_SIZE, (
+        f"RealESRGAN model too small: {size} bytes (expected > {REALESRGAN_MIN_SIZE})"
+    )
+    print(f"  RealESRGAN_x4plus.pth downloaded ({size / 1_000_000:.1f} MB)\n")
+
+
+def download_paddleocr_models():
+    """Pre-download PaddleOCR models for all supported languages."""
+    print("=== Downloading PaddleOCR models ===")
+    try:
+        from paddleocr import PaddleOCR
+    except ImportError as e:
+        if "libcuda" in str(e):
+            # paddlepaddle-gpu can't import without CUDA driver at build time.
+            # Models will be downloaded on first use at runtime instead.
+            print(f"  Skipping PaddleOCR model pre-download (no CUDA driver at build time)")
+            print(f"  Models will download on first use at runtime.\n")
+            return
+        raise
+
+    for lang in PADDLEOCR_LANGUAGES:
+        print(f"  Downloading models for lang={lang}...")
+        PaddleOCR(lang=lang, use_gpu=False, show_log=False)
+        print(f"  {lang} ready")
+    print(f"All {len(PADDLEOCR_LANGUAGES)} PaddleOCR languages downloaded.\n")
+
+
+def verify_mediapipe():
+    """Verify MediaPipe face detection models are bundled in the wheel."""
+    print("=== Verifying MediaPipe models ===")
+    import mediapipe as mp
+
+    for selection in [0, 1]:
+        label = "short-range" if selection == 0 else "full-range"
+        print(f"  Verifying {label} model (selection={selection})...")
+        detector = mp.solutions.face_detection.FaceDetection(
+            model_selection=selection, min_detection_confidence=0.5
+        )
+        detector.close()
+        print(f"  {label} model OK")
+    print("MediaPipe models verified.\n")
+
+
+def smoke_test():
+    """Final verification that all ML libraries and models are loadable.
+
+    GPU-dependent libraries (paddlepaddle-gpu, torch CUDA) cannot be imported
+    at build time because the CUDA driver is only available at runtime. We verify
+    CPU-only imports and check that model files exist on disk.
+    """
+    print("=== Running smoke test ===")
+
+    # CPU-only imports that work on all platforms at build time
+    from PIL import Image
+    import cv2
+    import numpy
+    import seam_carving
+    from rembg import new_session
+    print("  CPU imports OK (Pillow, cv2, numpy, seam_carving, rembg)")
+
+    # MediaPipe is CPU-only, should always import
+    import mediapipe as mp
+    print("  MediaPipe import OK")
+
+    # RealESRGAN model file must exist
+    assert os.path.exists(REALESRGAN_MODEL_PATH), (
+        f"RealESRGAN model missing: {REALESRGAN_MODEL_PATH}"
+    )
+    assert os.path.getsize(REALESRGAN_MODEL_PATH) > REALESRGAN_MIN_SIZE, (
+        "RealESRGAN model file is too small"
+    )
+    print("  RealESRGAN model file verified")
+
+    print("Smoke test passed.\n")
+
+
+def main():
+    print("Pre-downloading all ML models...\n")
+    download_rembg_models()
+    download_realesrgan_model()
+    download_paddleocr_models()
+    verify_mediapipe()
+    smoke_test()
+    print("All models downloaded and verified.")
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/ai/python/ocr.py
+++ b/packages/ai/python/ocr.py
@ -33,23 +33,39 @@ def run_tesseract(input_path, language):
 def run_paddleocr(input_path, language):
    """Run PaddleOCR."""
    os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
-    from paddleocr import PaddleOCR
-    from gpu import gpu_available

-    emit_progress(20, "Loading")
-    ocr = PaddleOCR(lang=language, use_gpu=gpu_available())
-    emit_progress(30, "Scanning")
-    result = ocr.ocr(input_path)
-    emit_progress(70, "Extracting text")
-    text = "\n".join(
-        [
-            line[1][0]
-            for res in result
-            if res
-            for line in res
-            if line and line[1]
-        ]
-    )
+    # Redirect stdout to stderr so PaddleOCR download/init messages
+    # cannot contaminate our JSON result on stdout.
+    stdout_fd = os.dup(1)
+    os.dup2(2, 1)
+
+    try:
+        from paddleocr import PaddleOCR
+        from gpu import gpu_available
+
+        # Map API language codes to PaddleOCR codes
+        paddle_lang_map = {"en": "en", "de": "latin", "fr": "latin", "es": "latin", "zh": "ch", "ja": "japan", "ko": "korean"}
+        paddle_lang = paddle_lang_map.get(language, "en")
+
+        emit_progress(20, "Loading")
+        ocr = PaddleOCR(lang=paddle_lang, use_gpu=gpu_available(), show_log=False)
+        emit_progress(30, "Scanning")
+        result = ocr.ocr(input_path)
+        emit_progress(70, "Extracting text")
+        text = "\n".join(
+            [
+                line[1][0]
+                for res in result
+                if res
+                for line in res
+                if line and line[1]
+            ]
+        )
+    finally:
+        # Restore stdout
+        os.dup2(stdout_fd, 1)
+        os.close(stdout_fd)
+
    return text, "paddleocr"


--- a/packages/ai/python/requirements-gpu.txt
+++ b/packages/ai/python/requirements-gpu.txt
@ -1,6 +1,5 @@
 rembg==2.0.62
 realesrgan==0.3.0
-lama-cleaner==1.2.5
 paddleocr==2.9.1
 paddlepaddle-gpu==3.0.0
 mediapipe==0.10.21
@ -8,3 +7,4 @@ onnxruntime-gpu==1.20.1
 numpy==1.26.4
 Pillow==11.1.0
 opencv-python-headless==4.10.0.84
+seam-carving==1.1.0
--- a/packages/ai/python/requirements.txt
+++ b/packages/ai/python/requirements.txt
@ -1,6 +1,5 @@
 rembg[cpu]==2.0.62
 realesrgan==0.3.0
-lama-cleaner==1.2.5
 paddleocr==2.9.1
 paddlepaddle==3.0.0
 mediapipe==0.10.21
--- a/packages/ai/python/upscale.py
+++ b/packages/ai/python/upscale.py
@ -1,6 +1,7 @@
 """Image upscaling with Real-ESRGAN fallback to Lanczos."""
 import sys
 import json
+import os


 def emit_progress(percent, stage):
@ -8,6 +9,12 @@ def emit_progress(percent, stage):
    print(json.dumps({"progress": percent, "stage": stage}), file=sys.stderr, flush=True)


+REALESRGAN_MODEL_PATH = os.environ.get(
+    "REALESRGAN_MODEL_PATH",
+    "/opt/models/realesrgan/RealESRGAN_x4plus.pth",
+)
+
+
 def main():
    input_path = sys.argv[1]
    output_path = sys.argv[2]
@ -24,26 +31,40 @@ def main():

        # Try Real-ESRGAN first
        try:
-            from basicsr.archs.rrdbnet_arch import RRDBNet
-            from realesrgan import RealESRGANer
-            from gpu import gpu_available
-            import numpy as np
-            import torch
+            # Redirect stdout to stderr so basicsr/realesrgan init messages
+            # cannot contaminate our JSON result on stdout.
+            stdout_fd = os.dup(1)
+            os.dup2(2, 1)
+
+            try:
+                from basicsr.archs.rrdbnet_arch import RRDBNet
+                from realesrgan import RealESRGANer
+                from gpu import gpu_available
+                import numpy as np
+                import torch
+            finally:
+                # Restore stdout after imports
+                os.dup2(stdout_fd, 1)
+                os.close(stdout_fd)
+
+            if not os.path.exists(REALESRGAN_MODEL_PATH):
+                raise FileNotFoundError(f"RealESRGAN model not found: {REALESRGAN_MODEL_PATH}")

            use_gpu = gpu_available()
            device = torch.device("cuda" if use_gpu else "cpu")

+            # RealESRGAN_x4plus is a 4x model internally
            model = RRDBNet(
                num_in_ch=3,
                num_out_ch=3,
                num_feat=64,
                num_block=23,
                num_grow_ch=32,
-                scale=scale,
+                scale=4,
            )
            upsampler = RealESRGANer(
-                scale=scale,
-                model_path=None,
+                scale=4,
+                model_path=REALESRGAN_MODEL_PATH,
                model=model,
                half=use_gpu,
                device=device,
@ -57,8 +78,8 @@ def main():
            emit_progress(95, "Saving result")
            result.save(output_path)
            method = "realesrgan"
-        except (ImportError, Exception):
-            # Fallback to Lanczos upscaling
+        except (ImportError, FileNotFoundError, RuntimeError, OSError):
+            # RealESRGAN unavailable or failed - fall back to Lanczos
            emit_progress(50, "Upscaling with Lanczos")
            img_upscaled = img.resize(new_size, Image.LANCZOS)
            emit_progress(95, "Saving result")