fix: resolve runtime model path mismatch for non-root Docker user

Set U2NET_HOME=/opt/models/rembg so rembg models pre-downloaded at build time as root are found at runtime by the non-root ashim user. Without this every fresh container re-downloaded the 973 MB BiRefNet models on first background-removal request. Apply the same fix to PaddleOCR: download to /opt/models/paddlex and symlink into both /root/.paddlex and /app/.paddlex so PaddleX finds models regardless of which HOME gosu resolves at runtime. Fall back to per-request spawning in bridge.ts when the persistent dispatcher crashes mid-request (e.g. OOM loading a large ONNX model), so the operation succeeds instead of surfacing "Python dispatcher exited unexpectedly" to the user. Improve entrypoint.sh permission warning to mention Windows bind mounts as the likely cause.
2026-04-21 13:37:52 +00:00 · 2026-04-16 23:45:02 +08:00 · 2026-04-16 23:45:02 +08:00 · f28792a5ed
commit f28792a5ed
parent e516cee836
4 changed files with 24 additions and 4 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -205,6 +205,11 @@ RUN /opt/venv/bin/pip install --no-deps codeformer-pip==0.0.4 lpips
 # Re-pin numpy to 1.26.4 in case any transitive dep upgraded it
 RUN /opt/venv/bin/pip install numpy==1.26.4

+# Pin rembg model storage to a fixed path so models downloaded at build time
+# (as root) are found at runtime (as the non-root ashim user, home=/app).
+# Without this, rembg defaults to ~/.u2net which differs between users.
+ENV U2NET_HOME=/opt/models/rembg
+
 # Pre-download and verify all ML models
 # Note: on amd64, paddlepaddle-gpu can't import without the CUDA driver (only
 # available at runtime). The download script gracefully skips PaddleOCR model
@ -216,7 +221,14 @@ RUN if [ "$SKIP_MODEL_DOWNLOADS" = "true" ]; then \
        echo "Skipping model downloads (CI build)"; \
    else \
        /opt/venv/bin/python3 /tmp/download_models.py; \
-    fi && rm -f /tmp/download_models.py
+    fi && rm -f /tmp/download_models.py && \
+    # Symlink PaddleX model dir into both possible HOME locations so models are
+    # found regardless of whether HOME=/root (build/root context) or HOME=/app
+    # (runtime ashim user via gosu). Without this PaddleX re-downloads on every
+    # fresh container start.
+    mkdir -p /opt/models/paddlex/official_models /root/.paddlex /app/.paddlex && \
+    ln -sf /opt/models/paddlex/official_models /root/.paddlex/official_models && \
+    ln -sf /opt/models/paddlex/official_models /app/.paddlex/official_models

 WORKDIR /app

--- a/docker/download_models.py
+++ b/docker/download_models.py
@ -174,7 +174,7 @@ PADDLEOCR_MODELS = [
 PADDLEOCR_VL_MODEL = "PaddlePaddle/PaddleOCR-VL-1.5"

 # PaddleX stores models here by default
-PADDLEX_MODEL_DIR = os.path.expanduser("~/.paddlex/official_models")
+PADDLEX_MODEL_DIR = "/opt/models/paddlex/official_models"


 def _register_birefnet_matting():
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@ -11,7 +11,7 @@ export DEFAULT_PASSWORD="${DEFAULT_PASSWORD:-admin}"
 # This runs as root, fixes permissions, then drops to ashim via gosu.
 if [ "$(id -u)" = "0" ]; then
  chown -R ashim:ashim /data /tmp/workspace 2>&1 || \
-    echo "WARNING: Could not fix volume permissions. If processing fails, check your volume mount permissions." >&2
+    echo "WARNING: Could not fix volume permissions. Use named volumes (not Windows bind mounts) to avoid this. See docs for details." >&2
  exec gosu ashim "$@"
 fi

--- a/packages/ai/src/bridge.ts
+++ b/packages/ai/src/bridge.ts
@ -356,7 +356,15 @@ export function runPythonWithProgress(
  // Try persistent dispatcher first
  const dispatcherPromise = dispatcherRun(scriptName, args, options);
  if (dispatcherPromise) {
-    return dispatcherPromise;
+    return dispatcherPromise.catch((err: Error) => {
+      // Dispatcher crashed mid-request (e.g. OOM when loading a large model).
+      // Retry in an isolated per-request process which starts clean and has
+      // more available memory than the warm dispatcher.
+      if (err.message === "Python dispatcher exited unexpectedly") {
+        return runPythonPerRequest(scriptName, args, options);
+      }
+      throw err;
+    });
  }

  // Fall back to per-request spawning