feat: unified Docker image with GPU auto-detection (#37)

Merge CPU, CUDA, and lite Docker images into a single unified image.
One tag (latest) works on all platforms: amd64 (NVIDIA CUDA) and arm64 (CPU).
GPU auto-detected at runtime. All ML models and packages baked in.

Key changes:
- Platform-conditional Dockerfile (nvidia/cuda on amd64, node on arm64)
- tini as PID 1 for proper signal handling
- Fix FILES_STORAGE_PATH data loss bug
- Fix RealESRGAN upscaler (was broken, always fell back to Lanczos)
- Fix PaddleOCR language codes and stdout corruption
- Simplified CI/CD (single build, single tag)
- Expanded model pre-download with verification
- Shutdown timeout, improved health endpoint
- Remove unused lama-cleaner
This commit is contained in:
stirling-image 2026-04-10 13:21:06 +08:00 committed by GitHub
parent 7bc979f677
commit b0083e2b08
15 changed files with 374 additions and 310 deletions

View file

@ -80,20 +80,8 @@ jobs:
- run: pnpm build
docker:
name: Docker Build Test (${{ matrix.tag }})
name: Docker Build Test
runs-on: ubuntu-latest
strategy:
matrix:
include:
- tag: full
variant: full
gpu: "false"
- tag: lite
variant: lite
gpu: "false"
- tag: cuda
variant: full
gpu: "true"
steps:
- uses: actions/checkout@v4
@ -104,9 +92,6 @@ jobs:
context: .
file: docker/Dockerfile
push: false
build-args: |
VARIANT=${{ matrix.variant }}
GPU=${{ matrix.gpu }}
tags: stirling-image:ci-${{ matrix.tag }}
cache-from: type=gha,scope=${{ matrix.tag }}
cache-to: type=gha,mode=max,scope=${{ matrix.tag }}
tags: stirling-image:ci
cache-from: type=gha,scope=unified
cache-to: type=gha,mode=max,scope=unified

View file

@ -49,28 +49,10 @@ jobs:
fi
docker:
name: Docker (${{ matrix.tag }})
name: Docker Build & Push
needs: release
if: needs.release.outputs.new_version != ''
runs-on: ubuntu-latest
strategy:
matrix:
include:
- tag: full
variant: full
gpu: "false"
suffix: ""
platforms: "linux/amd64,linux/arm64"
- tag: lite
variant: lite
gpu: "false"
suffix: "-lite"
platforms: "linux/amd64,linux/arm64"
- tag: cuda
variant: full
gpu: "true"
suffix: "-cuda"
platforms: "linux/amd64"
steps:
- name: Checkout release tag
uses: actions/checkout@v4
@ -104,10 +86,10 @@ jobs:
stirlingimage/stirling-image
ghcr.io/${{ github.repository }}
tags: |
type=semver,pattern={{version}}${{ matrix.suffix }},value=v${{ needs.release.outputs.new_version }}
type=semver,pattern={{major}}.{{minor}}${{ matrix.suffix }},value=v${{ needs.release.outputs.new_version }}
type=semver,pattern={{major}}${{ matrix.suffix }},value=v${{ needs.release.outputs.new_version }}
type=raw,value=${{ matrix.tag == 'full' && 'latest' || matrix.tag }}
type=semver,pattern={{version}},value=v${{ needs.release.outputs.new_version }}
type=semver,pattern={{major}}.{{minor}},value=v${{ needs.release.outputs.new_version }}
type=semver,pattern={{major}},value=v${{ needs.release.outputs.new_version }}
type=raw,value=latest
- name: Build and push
uses: docker/build-push-action@v6
@ -115,11 +97,8 @@ jobs:
context: .
file: docker/Dockerfile
push: true
build-args: |
VARIANT=${{ matrix.variant }}
GPU=${{ matrix.gpu }}
platforms: ${{ matrix.platforms }}
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,scope=${{ matrix.tag }}
cache-to: type=gha,mode=max,scope=${{ matrix.tag }}
cache-from: type=gha,scope=unified
cache-to: type=gha,mode=max,scope=unified

View file

@ -23,6 +23,14 @@ import { teamsRoutes } from "./routes/teams.js";
import { registerToolRoutes } from "./routes/tools/index.js";
import { userFileRoutes } from "./routes/user-files.js";
// Warn about deprecated STIRLING_VARIANT env var
if (process.env.STIRLING_VARIANT) {
console.warn(
`WARNING: STIRLING_VARIANT="${process.env.STIRLING_VARIANT}" is set but ignored. ` +
"There is now a single unified image with all features. Remove STIRLING_VARIANT from your environment.",
);
}
// Run before anything else
runMigrations();
console.log("Database initialized");
@ -108,12 +116,23 @@ await teamsRoutes(app);
// API docs (Scalar)
await docsRoutes(app);
// Public health check (minimal - no internal details)
app.get("/api/v1/health", async () => ({
status: "healthy",
version: APP_VERSION,
variant: process.env.STIRLING_VARIANT === "lite" ? "lite" : "full",
}));
// Public health check (checks core dependencies)
app.get("/api/v1/health", async (_request, reply) => {
let dbOk = false;
try {
db.select().from(schema.settings).limit(1).get();
dbOk = true;
} catch {
/* db unreachable */
}
const status = dbOk ? "healthy" : "unhealthy";
const code = dbOk ? 200 : 503;
return reply.code(code).send({
status,
version: APP_VERSION,
});
});
// Admin health check (full diagnostics)
app.get("/api/v1/admin/health", async (request, reply) => {
@ -161,12 +180,19 @@ try {
}
// Graceful shutdown
const SHUTDOWN_TIMEOUT_MS = 8000;
let shuttingDown = false;
async function shutdown(signal: string) {
if (shuttingDown) return;
shuttingDown = true;
console.log(`\n${signal} received, shutting down gracefully...`);
const forceExit = setTimeout(() => {
console.error("Shutdown timed out, forcing exit");
process.exit(1);
}, SHUTDOWN_TIMEOUT_MS);
forceExit.unref();
cleanupCron.stop();
try {
@ -199,6 +225,7 @@ async function shutdown(signal: string) {
console.error("Error closing database:", err);
}
clearTimeout(forceExit);
process.exit(0);
}

View file

@ -4,12 +4,8 @@ The `@stirling-image/ai` package wraps Python ML models in TypeScript functions.
All model weights are bundled in the Docker image during the build. No downloads happen at runtime.
::: warning Lite image
AI tools are not available in the `:lite` Docker image. The API returns `501 Not Available` for these endpoints when running the lite variant. Use `:latest` for AI features. See [Docker Tags](/guide/docker-tags) for details.
:::
::: tip GPU acceleration
The `:cuda` Docker image includes GPU-accelerated versions of the ML libraries. Background removal, upscaling, and OCR all benefit from NVIDIA GPU acceleration. The image auto-detects your GPU and falls back to CPU if none is available. See [Docker Tags](/guide/docker-tags) for setup.
The Docker image includes CUDA-accelerated ML libraries on amd64. Add `--gpus all` to your Docker run command to enable GPU acceleration. The image auto-detects your GPU and falls back to CPU if none is available.
:::
## Background removal
@ -73,7 +69,7 @@ Returns the blurred image along with metadata about each detected face region (b
Removes objects from images by filling in the area with generated content that matches the surroundings.
**Model:** [LaMa](https://github.com/advimman/lama) (Large Mask Inpainting)
**Model:** OpenCV TELEA algorithm
Takes an image and a mask (white = area to erase, black = keep). Returns the inpainted image.

View file

@ -34,7 +34,7 @@ Supported operations:
- **Upscaling** -- RealESRGAN
- **OCR** -- PaddleOCR
- **Face detection/blurring** -- MediaPipe
- **Object erasing (inpainting)** -- LaMa Cleaner
- **Object erasing (inpainting)** -- OpenCV
Python scripts live in `packages/ai/python/`. The Docker image pre-downloads all model weights during the build so the container works offline.

View file

@ -1,16 +1,8 @@
# Deployment
Stirling Image ships as a single Docker container. The image supports **linux/amd64** and **linux/arm64**, so it runs natively on Intel/AMD servers, Apple Silicon Macs, and ARM devices like the Raspberry Pi 4/5.
Stirling Image ships as a single Docker container. The image supports **linux/amd64** (with NVIDIA CUDA) and **linux/arm64** (CPU), so it runs natively on Intel/AMD servers, Apple Silicon Macs, and ARM devices like the Raspberry Pi 4/5.
Three variants are available:
| Variant | Tag | Size | What's included |
|---------|-----|------|-----------------|
| Full | `:latest` | ~11 GB | All tools + AI/ML (background removal, upscaling, OCR, face blur, object eraser) |
| Lite | `:lite` | ~1.5 GB | All image processing tools, no AI/ML |
| CUDA | `:cuda` | ~14 GB | Full + GPU-accelerated AI (NVIDIA only, amd64) |
See [Docker Tags](./docker-tags) for the full comparison, Docker Compose examples, and version pinning.
See [Docker Image](./docker-tags) for GPU setup, Docker Compose examples, and version pinning.
## Docker Compose (recommended)
@ -64,14 +56,14 @@ Everything runs from a single process. The Fastify server handles API requests a
- RealESRGAN (upscaling)
- PaddleOCR (text recognition)
- MediaPipe (face detection)
- LaMa Cleaner (inpainting/object removal)
- OpenCV (inpainting/object removal)
- onnxruntime, opencv-python, Pillow, numpy
Model weights are downloaded at build time, so the container works fully offline. The lite image (`:lite`) skips all Python packages and model downloads.
Model weights are downloaded at build time, so the container works fully offline.
### Architecture notes
All core image tools (resize, crop, compress, convert, watermark, etc.) work on both amd64 and arm64. Some ML packages (PaddleOCR, MediaPipe, LaMa Cleaner) have limited arm64 support and may be unavailable on ARM systems. The container logs a warning for any package that could not be installed and falls back gracefully — Tesseract handles OCR and Lanczos handles upscaling when the ML alternatives are missing.
All tools work on both amd64 and arm64. AI tools (background removal, upscaling, OCR, face detection) use CUDA-accelerated packages on amd64 and CPU packages on arm64. GPU acceleration is auto-detected at runtime when `--gpus all` is passed.
## Volumes
@ -120,7 +112,7 @@ Set `client_max_body_size` to match your `MAX_UPLOAD_SIZE_MB` value.
The GitHub repository has two workflows:
- **release.yml** -- On release, builds multi-arch Docker images (amd64 + arm64) for both the full and lite variants, and pushes to Docker Hub (`stirlingimage/stirling-image`) and GitHub Container Registry (`ghcr.io/stirling-image/stirling-image`).
- **release.yml** -- On release, builds a multi-arch Docker image (amd64 + arm64), and pushes to Docker Hub (`stirlingimage/stirling-image`) and GitHub Container Registry (`ghcr.io/stirling-image/stirling-image`).
- **deploy-docs.yml** -- Builds this documentation site and deploys it to GitHub Pages.
Both run automatically. No manual steps needed after merging to `main`.

View file

@ -1,53 +1,28 @@
# Docker Image Tags
# Docker Image
Stirling Image ships three Docker image variants to fit different use cases.
Stirling Image ships as a single Docker image that works on all platforms.
## Full (default)
## Quick start
```bash
docker pull stirlingimage/stirling-image:latest
docker run -d -p 1349:1349 -v stirling-data:/data stirlingimage/stirling-image:latest
```
Includes all tools: image processing, AI-powered background removal, upscaling, face blurring, object erasing, and OCR. Size is ~11 GB due to bundled ML models.
The app is available at `http://localhost:1349`.
## Lite
## GPU acceleration
The image includes CUDA support on amd64. If you have an NVIDIA GPU with the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed, add `--gpus all`:
```bash
docker pull stirlingimage/stirling-image:lite
docker run -d --gpus all -p 1349:1349 -v stirling-data:/data stirlingimage/stirling-image:latest
```
Includes all image processing tools (resize, crop, rotate, convert, compress, watermark, collage, and 20+ more) but excludes AI/ML tools. Size is ~1-2 GB.
Use this if you:
- Only need standard image processing (no AI features)
- Are running on constrained hardware (Raspberry Pi, small VPS)
- Want faster pulls and smaller disk footprint
### Tools excluded from lite
| Tool | What it does |
|------|-------------|
| Remove Background | AI-powered background removal |
| Upscale | AI super-resolution upscaling |
| Blur Faces | AI face detection and blurring |
| Erase Object | AI inpainting to remove objects |
| OCR | Optical character recognition |
All other tools (27+) work identically in both variants.
## CUDA (GPU acceleration)
```bash
docker pull stirlingimage/stirling-image:cuda
```
Same tools as the full image, but built with GPU-accelerated Python packages (onnxruntime-gpu, PyTorch CUDA, PaddlePaddle GPU). The image auto-detects your NVIDIA GPU at runtime and falls back to CPU if none is found.
Requires [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) on the host. Linux amd64 only.
The image auto-detects your GPU at runtime. Without `--gpus all`, it runs on CPU. Same image either way.
### Benchmarks
Tested on an NVIDIA RTX 4070 (12 GB VRAM) with a 572x1024 JPEG portrait. Both images ran on the same machine. "Warm" means the model is already loaded in memory (second request onward).
Tested on an NVIDIA RTX 4070 (12 GB VRAM) with a 572x1024 JPEG portrait.
#### Warm performance
@ -68,10 +43,6 @@ Tested on an NVIDIA RTX 4070 (12 GB VRAM) with a 572x1024 JPEG portrait. Both im
| Upscale 2x | 3,957ms | 2,318ms | 1.7x |
| OCR (PaddleOCR) | 1,469ms | 1,090ms | 1.3x |
Cold start includes loading the model into memory. GPU cold starts are faster because CUDA parallelizes the model loading.
Larger images show bigger speedups, especially for upscaling. Non-AI tools (resize, crop, convert, etc.) are unaffected since they use Sharp (CPU-based).
### GPU health check
After the first AI request, the admin health endpoint reports GPU status:
@ -83,8 +54,6 @@ GET /api/v1/admin/health
## Docker Compose
### Full
```yaml
services:
stirling-image:
@ -94,35 +63,24 @@ services:
volumes:
- stirling-data:/data
- stirling-workspace:/tmp/workspace
restart: unless-stopped
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
volumes:
stirling-data:
stirling-workspace:
```
### Lite
For GPU acceleration via Docker Compose, add the deploy section:
```yaml
services:
stirling-image:
image: stirlingimage/stirling-image:lite
ports:
- "1349:1349"
volumes:
- stirling-data:/data
- stirling-workspace:/tmp/workspace
volumes:
stirling-data:
stirling-workspace:
```
### CUDA
```yaml
services:
stirling-image:
image: stirlingimage/stirling-image:cuda
image: stirlingimage/stirling-image:latest
ports:
- "1349:1349"
volumes:
@ -135,35 +93,34 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
volumes:
stirling-data:
stirling-workspace:
```
## Switching from lite to full
To upgrade from lite to full and unlock AI tools:
1. Stop your container
2. Pull the full image: `docker pull stirlingimage/stirling-image:latest`
3. Update your compose file or run command to use `:latest` instead of `:lite`
4. Start the container
Your data and settings are preserved in the volumes.
## Version pinning
Both variants support semver tags for pinning:
| Tag | Description |
|-----|------------|
| `latest` | Latest full release |
| `lite` | Latest lite release |
| `cuda` | Latest full release with GPU support |
| `1.6.0` | Exact full version |
| `1.6.0-lite` | Exact lite version |
| `1.6.0-cuda` | Exact CUDA version |
| `1.6` | Latest patch in 1.6.x (full) |
| `1.6-lite` | Latest patch in 1.6.x (lite) |
| `1.6-cuda` | Latest patch in 1.6.x (CUDA) |
| `latest` | Latest release |
| `1.11.0` | Exact version |
| `1.11` | Latest patch in 1.11.x |
| `1` | Latest minor in 1.x |
## Platforms
| Architecture | GPU support | Notes |
|---|---|---|
| linux/amd64 | NVIDIA CUDA | Full GPU acceleration for AI tools |
| linux/arm64 | CPU only | Raspberry Pi 4/5, Apple Silicon via Docker Desktop |
## Migration from previous tags
If you were using `:lite` or `:cuda` tags, switch to `:latest`:
- **From `:lite`**: Pull `:latest`. You now have all AI tools included.
- **From `:cuda`**: Pull `:latest` and keep `--gpus all`. Same GPU support, unified image.
Your data and settings are preserved in the volumes.

View file

@ -1,12 +1,9 @@
# syntax=docker/dockerfile:1
# ============================================
# Stirling Image - Production Dockerfile
# Multi-stage build for single-container deployment
# Stirling Image - Unified Production Dockerfile
# Single image: GPU auto-detected on amd64, CPU on arm64
# ============================================
ARG VARIANT=full
ARG GPU=false
# ============================================
# Stage 1: Build the frontend (Vite + React)
# ============================================
@ -39,25 +36,22 @@ RUN --mount=type=cache,id=turbo-cache,target=/app/.turbo \
pnpm --filter @stirling-image/web build
# ============================================
# Stage 2: Base image selection
# Stage 2: Platform-specific base images
# ============================================
FROM node:22-bookworm AS base-cpu
FROM nvidia/cuda:12.6.3-runtime-ubuntu24.04 AS base-gpu
# Select base: GPU=false -> base-cpu, GPU=true -> base-gpu
FROM base-cpu AS production-base-false
FROM base-gpu AS production-base-true
FROM node:22-bookworm AS base-linux-arm64
FROM nvidia/cuda:12.6.3-runtime-ubuntu24.04 AS base-linux-amd64
# ============================================
# Stage 3: Production runtime
# ============================================
FROM production-base-${GPU} AS production
ARG TARGETOS
ARG TARGETARCH
FROM base-${TARGETOS}-${TARGETARCH} AS production
ARG VARIANT
ARG GPU
ARG TARGETARCH
# Install Node.js when using CUDA base (node:22-bookworm already has it)
RUN if [ "$GPU" = "true" ]; then \
# Install Node.js on amd64 (CUDA base has no Node; arm64 base already has it)
RUN if [ "$TARGETARCH" = "amd64" ]; then \
apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates gnupg && \
mkdir -p /etc/apt/keyrings && \
@ -71,67 +65,65 @@ RUN if [ "$GPU" = "true" ]; then \
RUN corepack enable && corepack prepare pnpm@9.15.4 --activate
# System dependencies shared by all variants
# System dependencies (all platforms)
RUN apt-get update && apt-get install -y --no-install-recommends \
tini \
imagemagick \
libraw-dev \
potrace \
curl \
gosu \
libheif-examples \
python3 python3-pip python3-venv python3-dev \
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra tesseract-ocr-spa \
build-essential \
libgl1 libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Python/ML system dependencies (full variant only)
RUN if [ "$VARIANT" = "full" ]; then \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip python3-venv python3-dev \
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra tesseract-ocr-spa \
build-essential \
libgl1 libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/* \
# Python venv - Layer 1: Base packages (rarely change, ~3 GB)
RUN python3 -m venv /opt/venv && \
/opt/venv/bin/pip install --upgrade pip && \
/opt/venv/bin/pip install \
Pillow==11.1.0 \
numpy==1.26.4 \
opencv-python-headless==4.10.0.84
# Platform-conditional ONNX runtime
RUN if [ "$TARGETARCH" = "amd64" ]; then \
/opt/venv/bin/pip install onnxruntime-gpu==1.20.1 \
; else \
/opt/venv/bin/pip install onnxruntime==1.20.1 \
; fi
# Python venv + ML packages + model weights (full variant only)
COPY packages/ai/python/requirements.txt /tmp/requirements.txt
COPY packages/ai/python/requirements-gpu.txt /tmp/requirements-gpu.txt
RUN if [ "$VARIANT" = "full" ]; then \
python3 -m venv /opt/venv && \
/opt/venv/bin/pip install --upgrade pip && \
if [ "$GPU" = "true" ]; then \
/opt/venv/bin/pip install \
Pillow numpy opencv-python-headless onnxruntime-gpu && \
(/opt/venv/bin/pip install rembg || echo "WARNING: rembg not installed") && \
(/opt/venv/bin/pip install realesrgan \
--extra-index-url https://download.pytorch.org/whl/cu126 \
|| echo "WARNING: realesrgan not installed") && \
(/opt/venv/bin/pip install paddlepaddle-gpu paddleocr || echo "WARNING: PaddleOCR not installed") && \
(/opt/venv/bin/pip install mediapipe || echo "WARNING: mediapipe not installed") && \
(/opt/venv/bin/pip install lama-cleaner || echo "WARNING: lama-cleaner not installed") && \
(/opt/venv/bin/pip install seam-carving || echo "WARNING: seam-carving not installed") \
; else \
/opt/venv/bin/pip install \
Pillow numpy opencv-python-headless onnxruntime && \
(/opt/venv/bin/pip install "rembg[cpu]" || echo "WARNING: rembg not installed") && \
(/opt/venv/bin/pip install realesrgan || echo "WARNING: realesrgan not installed") && \
(/opt/venv/bin/pip install paddlepaddle paddleocr || echo "WARNING: PaddleOCR not installed") && \
(/opt/venv/bin/pip install mediapipe || echo "WARNING: mediapipe not installed") && \
(/opt/venv/bin/pip install lama-cleaner || echo "WARNING: lama-cleaner not installed") && \
(/opt/venv/bin/pip install seam-carving || echo "WARNING: seam-carving not installed") \
; fi \
; fi && rm -f /tmp/requirements.txt /tmp/requirements-gpu.txt
# Python venv - Layer 2: Tool packages (change occasionally, ~2 GB)
RUN if [ "$TARGETARCH" = "amd64" ]; then \
/opt/venv/bin/pip install rembg==2.0.62 && \
/opt/venv/bin/pip install realesrgan==0.3.0 \
--extra-index-url https://download.pytorch.org/whl/cu126 && \
/opt/venv/bin/pip install paddlepaddle-gpu==3.0.0 \
--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ && \
/opt/venv/bin/pip install paddleocr==2.9.1 \
; else \
/opt/venv/bin/pip install "rembg[cpu]==2.0.62" && \
/opt/venv/bin/pip install realesrgan==0.3.0 && \
/opt/venv/bin/pip install paddlepaddle==3.0.0 paddleocr==2.9.1 \
; fi
# mediapipe 0.10.21 only has amd64 wheels; arm64 maxes out at 0.10.18
RUN if [ "$TARGETARCH" = "amd64" ]; then \
/opt/venv/bin/pip install mediapipe==0.10.21 \
; else \
/opt/venv/bin/pip install mediapipe==0.10.18 \
; fi
RUN /opt/venv/bin/pip install seam-carving==1.1.0
# Pre-download and verify all ML models
# Note: on amd64, paddlepaddle-gpu can't import without the CUDA driver (only
# available at runtime). The download script gracefully skips PaddleOCR model
# pre-download in this case; models download on first use at runtime instead.
COPY docker/download_models.py /tmp/download_models.py
RUN if [ "$VARIANT" = "full" ]; then \
/opt/venv/bin/python3 /tmp/download_models.py && \
/opt/venv/bin/python3 -c "\
try: \
from paddleocr import PaddleOCR; \
print('Downloading PaddleOCR models...'); \
ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False); \
print('PaddleOCR models ready'); \
except: print('PaddleOCR model pre-download skipped') \
" 2>/dev/null || echo "WARNING: Could not pre-download PaddleOCR models" \
; fi && rm -f /tmp/download_models.py
RUN /opt/venv/bin/python3 /tmp/download_models.py && rm -f /tmp/download_models.py
WORKDIR /app
@ -151,10 +143,8 @@ RUN --mount=type=cache,id=pnpm-store,target=/root/.local/share/pnpm/store/v3 \
pnpm install --frozen-lockfile --prod
# Remove build tools no longer needed in production
RUN if [ "$VARIANT" = "full" ]; then \
apt-get purge -y --auto-remove build-essential python3-dev && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN apt-get purge -y --auto-remove build-essential python3-dev && \
rm -rf /var/lib/apt/lists/*
# Copy source code for API (tsx runs TS directly - no build step needed)
COPY apps/api/src ./apps/api/src
@ -170,9 +160,9 @@ COPY packages/ai/python ./packages/ai/python
COPY --from=builder /app/apps/web/dist ./apps/web/dist
# Create required directories
RUN mkdir -p /data /tmp/workspace
RUN mkdir -p /data /data/files /tmp/workspace
# Environment defaults (matching PRD Section 16.1)
# Environment defaults
ENV PORT=1349 \
NODE_ENV=production \
AUTH_ENABLED=true \
@ -181,6 +171,7 @@ ENV PORT=1349 \
STORAGE_MODE=local \
DB_PATH=/data/stirling.db \
WORKSPACE_PATH=/tmp/workspace \
FILES_STORAGE_PATH=/data/files \
PYTHON_VENV_PATH=/opt/venv \
DEFAULT_THEME=light \
DEFAULT_LOCALE=en \
@ -191,13 +182,20 @@ ENV PORT=1349 \
MAX_BATCH_SIZE=200 \
CONCURRENT_JOBS=3 \
MAX_MEGAPIXELS=100 \
RATE_LIMIT_PER_MIN=100 \
STIRLING_VARIANT=${VARIANT}
RATE_LIMIT_PER_MIN=100
# NVIDIA Container Toolkit env vars (harmless on non-GPU systems)
ENV NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility
# Suppress noisy ML library output in docker logs
ENV PYTHONWARNINGS=ignore \
TF_CPP_MIN_LOG_LEVEL=3 \
PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True
# Create non-root user for runtime
RUN groupadd -r stirling && useradd -r -g stirling -d /app -s /sbin/nologin stirling
RUN chown -R stirling:stirling /app /data /tmp/workspace && \
([ -d /opt/venv ] && chown -R stirling:stirling /opt/venv || true)
RUN chown -R stirling:stirling /app /data /tmp/workspace /opt/venv
# Entrypoint fixes volume permissions then drops to stirling via gosu
COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
@ -205,8 +203,9 @@ RUN chmod +x /usr/local/bin/entrypoint.sh
EXPOSE 1349
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
CMD curl -f http://localhost:1349/api/v1/health || exit 1
ENTRYPOINT ["entrypoint.sh"]
# tini as PID 1 for zombie reaping + signal forwarding
ENTRYPOINT ["tini", "--", "entrypoint.sh"]
CMD ["npx", "tsx", "apps/api/src/index.ts"]

View file

@ -1,17 +0,0 @@
# GPU override - use with:
# docker compose -f docker/docker-compose.yml -f docker/docker-compose.gpu.yml up
services:
stirling-image:
build:
context: ..
dockerfile: docker/Dockerfile
args:
GPU: "true"
image: stirlingimage/stirling-image:cuda
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]

View file

@ -1,32 +1,25 @@
name: stirling-image
services:
stirling-image:
build:
context: ..
dockerfile: docker/Dockerfile
image: stirlingimage/stirling-image:latest
container_name: stirling-image
ports:
- "1349:1349"
volumes:
- stirling-data:/data
- stirling-workspace:/tmp/workspace
environment:
- AUTH_ENABLED=true
- DEFAULT_USERNAME=admin
- DEFAULT_PASSWORD=admin
- STORAGE_MODE=local
- FILE_MAX_AGE_HOURS=24
- CLEANUP_INTERVAL_MINUTES=30
- MAX_UPLOAD_SIZE_MB=100
- MAX_BATCH_SIZE=200
- CONCURRENT_JOBS=3
- MAX_MEGAPIXELS=100
- RATE_LIMIT_PER_MIN=100
- DEFAULT_THEME=light
- DEFAULT_LOCALE=en
- APP_NAME=Stirling Image
volumes:
- stirling-data:/data
- stirling-workspace:/tmp/workspace
restart: unless-stopped
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
volumes:
stirling-data:

View file

@ -1,7 +1,26 @@
"""Pre-download all rembg models offered in the UI."""
import sys
"""Pre-download and verify all ML models for the Docker image.
MODELS = [
This script runs at Docker build time. Any failure exits non-zero,
failing the build. No silent fallbacks.
"""
import os
import sys
import urllib.request
# Force CPU mode during build - no GPU driver available at build time.
# Must be set before any ML library import.
os.environ["PADDLE_DEVICE"] = "cpu"
os.environ["FLAGS_use_cuda"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
REALESRGAN_MODEL_DIR = "/opt/models/realesrgan"
REALESRGAN_MODEL_URL = (
"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"
)
REALESRGAN_MODEL_PATH = os.path.join(REALESRGAN_MODEL_DIR, "RealESRGAN_x4plus.pth")
REALESRGAN_MIN_SIZE = 60_000_000 # ~67 MB
REMBG_MODELS = [
"u2net",
"isnet-general-use",
"bria-rmbg",
@ -10,18 +29,116 @@ MODELS = [
"birefnet-general",
]
try:
from rembg import new_session
except ImportError:
print("WARNING: rembg not installed, skipping model pre-download")
sys.exit(0)
# PaddleOCR language codes (not ISO). German/French/Spanish use "latin" model.
# Valid keys: ch, en, korean, japan, chinese_cht, ta, te, ka, latin, arabic, cyrillic, devanagari
PADDLEOCR_LANGUAGES = ["en", "ch", "japan", "korean", "latin"]
for model in MODELS:
print(f"Downloading {model}...")
try:
def download_rembg_models():
"""Download all rembg ONNX models."""
print("=== Downloading rembg models ===")
from rembg import new_session
for model in REMBG_MODELS:
print(f" Downloading {model}...")
new_session(model)
print(f" {model} ready")
except Exception as e:
print(f" WARNING: {model} failed: {e}")
print(f"All {len(REMBG_MODELS)} rembg models downloaded.\n")
print("Model pre-download complete")
def download_realesrgan_model():
"""Download RealESRGAN_x4plus.pth pretrained weights."""
print("=== Downloading RealESRGAN model ===")
os.makedirs(REALESRGAN_MODEL_DIR, exist_ok=True)
print(f" Downloading from {REALESRGAN_MODEL_URL}...")
urllib.request.urlretrieve(REALESRGAN_MODEL_URL, REALESRGAN_MODEL_PATH)
size = os.path.getsize(REALESRGAN_MODEL_PATH)
assert size > REALESRGAN_MIN_SIZE, (
f"RealESRGAN model too small: {size} bytes (expected > {REALESRGAN_MIN_SIZE})"
)
print(f" RealESRGAN_x4plus.pth downloaded ({size / 1_000_000:.1f} MB)\n")
def download_paddleocr_models():
"""Pre-download PaddleOCR models for all supported languages."""
print("=== Downloading PaddleOCR models ===")
try:
from paddleocr import PaddleOCR
except ImportError as e:
if "libcuda" in str(e):
# paddlepaddle-gpu can't import without CUDA driver at build time.
# Models will be downloaded on first use at runtime instead.
print(f" Skipping PaddleOCR model pre-download (no CUDA driver at build time)")
print(f" Models will download on first use at runtime.\n")
return
raise
for lang in PADDLEOCR_LANGUAGES:
print(f" Downloading models for lang={lang}...")
PaddleOCR(lang=lang, use_gpu=False, show_log=False)
print(f" {lang} ready")
print(f"All {len(PADDLEOCR_LANGUAGES)} PaddleOCR languages downloaded.\n")
def verify_mediapipe():
"""Verify MediaPipe face detection models are bundled in the wheel."""
print("=== Verifying MediaPipe models ===")
import mediapipe as mp
for selection in [0, 1]:
label = "short-range" if selection == 0 else "full-range"
print(f" Verifying {label} model (selection={selection})...")
detector = mp.solutions.face_detection.FaceDetection(
model_selection=selection, min_detection_confidence=0.5
)
detector.close()
print(f" {label} model OK")
print("MediaPipe models verified.\n")
def smoke_test():
"""Final verification that all ML libraries and models are loadable.
GPU-dependent libraries (paddlepaddle-gpu, torch CUDA) cannot be imported
at build time because the CUDA driver is only available at runtime. We verify
CPU-only imports and check that model files exist on disk.
"""
print("=== Running smoke test ===")
# CPU-only imports that work on all platforms at build time
from PIL import Image
import cv2
import numpy
import seam_carving
from rembg import new_session
print(" CPU imports OK (Pillow, cv2, numpy, seam_carving, rembg)")
# MediaPipe is CPU-only, should always import
import mediapipe as mp
print(" MediaPipe import OK")
# RealESRGAN model file must exist
assert os.path.exists(REALESRGAN_MODEL_PATH), (
f"RealESRGAN model missing: {REALESRGAN_MODEL_PATH}"
)
assert os.path.getsize(REALESRGAN_MODEL_PATH) > REALESRGAN_MIN_SIZE, (
"RealESRGAN model file is too small"
)
print(" RealESRGAN model file verified")
print("Smoke test passed.\n")
def main():
print("Pre-downloading all ML models...\n")
download_rembg_models()
download_realesrgan_model()
download_paddleocr_models()
verify_mediapipe()
smoke_test()
print("All models downloaded and verified.")
if __name__ == "__main__":
main()

View file

@ -33,23 +33,39 @@ def run_tesseract(input_path, language):
def run_paddleocr(input_path, language):
"""Run PaddleOCR."""
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
from paddleocr import PaddleOCR
from gpu import gpu_available
emit_progress(20, "Loading")
ocr = PaddleOCR(lang=language, use_gpu=gpu_available())
emit_progress(30, "Scanning")
result = ocr.ocr(input_path)
emit_progress(70, "Extracting text")
text = "\n".join(
[
line[1][0]
for res in result
if res
for line in res
if line and line[1]
]
)
# Redirect stdout to stderr so PaddleOCR download/init messages
# cannot contaminate our JSON result on stdout.
stdout_fd = os.dup(1)
os.dup2(2, 1)
try:
from paddleocr import PaddleOCR
from gpu import gpu_available
# Map API language codes to PaddleOCR codes
paddle_lang_map = {"en": "en", "de": "latin", "fr": "latin", "es": "latin", "zh": "ch", "ja": "japan", "ko": "korean"}
paddle_lang = paddle_lang_map.get(language, "en")
emit_progress(20, "Loading")
ocr = PaddleOCR(lang=paddle_lang, use_gpu=gpu_available(), show_log=False)
emit_progress(30, "Scanning")
result = ocr.ocr(input_path)
emit_progress(70, "Extracting text")
text = "\n".join(
[
line[1][0]
for res in result
if res
for line in res
if line and line[1]
]
)
finally:
# Restore stdout
os.dup2(stdout_fd, 1)
os.close(stdout_fd)
return text, "paddleocr"

View file

@ -1,6 +1,5 @@
rembg==2.0.62
realesrgan==0.3.0
lama-cleaner==1.2.5
paddleocr==2.9.1
paddlepaddle-gpu==3.0.0
mediapipe==0.10.21
@ -8,3 +7,4 @@ onnxruntime-gpu==1.20.1
numpy==1.26.4
Pillow==11.1.0
opencv-python-headless==4.10.0.84
seam-carving==1.1.0

View file

@ -1,6 +1,5 @@
rembg[cpu]==2.0.62
realesrgan==0.3.0
lama-cleaner==1.2.5
paddleocr==2.9.1
paddlepaddle==3.0.0
mediapipe==0.10.21

View file

@ -1,6 +1,7 @@
"""Image upscaling with Real-ESRGAN fallback to Lanczos."""
import sys
import json
import os
def emit_progress(percent, stage):
@ -8,6 +9,12 @@ def emit_progress(percent, stage):
print(json.dumps({"progress": percent, "stage": stage}), file=sys.stderr, flush=True)
REALESRGAN_MODEL_PATH = os.environ.get(
"REALESRGAN_MODEL_PATH",
"/opt/models/realesrgan/RealESRGAN_x4plus.pth",
)
def main():
input_path = sys.argv[1]
output_path = sys.argv[2]
@ -24,26 +31,40 @@ def main():
# Try Real-ESRGAN first
try:
from basicsr.archs.rrdbnet_arch import RRDBNet
from realesrgan import RealESRGANer
from gpu import gpu_available
import numpy as np
import torch
# Redirect stdout to stderr so basicsr/realesrgan init messages
# cannot contaminate our JSON result on stdout.
stdout_fd = os.dup(1)
os.dup2(2, 1)
try:
from basicsr.archs.rrdbnet_arch import RRDBNet
from realesrgan import RealESRGANer
from gpu import gpu_available
import numpy as np
import torch
finally:
# Restore stdout after imports
os.dup2(stdout_fd, 1)
os.close(stdout_fd)
if not os.path.exists(REALESRGAN_MODEL_PATH):
raise FileNotFoundError(f"RealESRGAN model not found: {REALESRGAN_MODEL_PATH}")
use_gpu = gpu_available()
device = torch.device("cuda" if use_gpu else "cpu")
# RealESRGAN_x4plus is a 4x model internally
model = RRDBNet(
num_in_ch=3,
num_out_ch=3,
num_feat=64,
num_block=23,
num_grow_ch=32,
scale=scale,
scale=4,
)
upsampler = RealESRGANer(
scale=scale,
model_path=None,
scale=4,
model_path=REALESRGAN_MODEL_PATH,
model=model,
half=use_gpu,
device=device,
@ -57,8 +78,8 @@ def main():
emit_progress(95, "Saving result")
result.save(output_path)
method = "realesrgan"
except (ImportError, Exception):
# Fallback to Lanczos upscaling
except (ImportError, FileNotFoundError, RuntimeError, OSError):
# RealESRGAN unavailable or failed - fall back to Lanczos
emit_progress(50, "Upscaling with Lanczos")
img_upscaled = img.resize(new_size, Image.LANCZOS)
emit_progress(95, "Saving result")