fix: resolve runtime model path mismatch for non-root Docker user

Set U2NET_HOME=/opt/models/rembg so rembg models pre-downloaded at
build time as root are found at runtime by the non-root ashim user.
Without this every fresh container re-downloaded the 973 MB BiRefNet
models on first background-removal request.

Apply the same fix to PaddleOCR: download to /opt/models/paddlex and
symlink into both /root/.paddlex and /app/.paddlex so PaddleX finds
models regardless of which HOME gosu resolves at runtime.

Fall back to per-request spawning in bridge.ts when the persistent
dispatcher crashes mid-request (e.g. OOM loading a large ONNX model),
so the operation succeeds instead of surfacing "Python dispatcher
exited unexpectedly" to the user.

Improve entrypoint.sh permission warning to mention Windows bind mounts
as the likely cause.
This commit is contained in:
ashim-hq 2026-04-16 23:45:02 +08:00
parent e516cee836
commit f28792a5ed
4 changed files with 24 additions and 4 deletions

View file

@ -205,6 +205,11 @@ RUN /opt/venv/bin/pip install --no-deps codeformer-pip==0.0.4 lpips
# Re-pin numpy to 1.26.4 in case any transitive dep upgraded it
RUN /opt/venv/bin/pip install numpy==1.26.4
# Pin rembg model storage to a fixed path so models downloaded at build time
# (as root) are found at runtime (as the non-root ashim user, home=/app).
# Without this, rembg defaults to ~/.u2net which differs between users.
ENV U2NET_HOME=/opt/models/rembg
# Pre-download and verify all ML models
# Note: on amd64, paddlepaddle-gpu can't import without the CUDA driver (only
# available at runtime). The download script gracefully skips PaddleOCR model
@ -216,7 +221,14 @@ RUN if [ "$SKIP_MODEL_DOWNLOADS" = "true" ]; then \
echo "Skipping model downloads (CI build)"; \
else \
/opt/venv/bin/python3 /tmp/download_models.py; \
fi && rm -f /tmp/download_models.py
fi && rm -f /tmp/download_models.py && \
# Symlink PaddleX model dir into both possible HOME locations so models are
# found regardless of whether HOME=/root (build/root context) or HOME=/app
# (runtime ashim user via gosu). Without this PaddleX re-downloads on every
# fresh container start.
mkdir -p /opt/models/paddlex/official_models /root/.paddlex /app/.paddlex && \
ln -sf /opt/models/paddlex/official_models /root/.paddlex/official_models && \
ln -sf /opt/models/paddlex/official_models /app/.paddlex/official_models
WORKDIR /app

View file

@ -174,7 +174,7 @@ PADDLEOCR_MODELS = [
PADDLEOCR_VL_MODEL = "PaddlePaddle/PaddleOCR-VL-1.5"
# PaddleX stores models here by default
PADDLEX_MODEL_DIR = os.path.expanduser("~/.paddlex/official_models")
PADDLEX_MODEL_DIR = "/opt/models/paddlex/official_models"
def _register_birefnet_matting():

View file

@ -11,7 +11,7 @@ export DEFAULT_PASSWORD="${DEFAULT_PASSWORD:-admin}"
# This runs as root, fixes permissions, then drops to ashim via gosu.
if [ "$(id -u)" = "0" ]; then
chown -R ashim:ashim /data /tmp/workspace 2>&1 || \
echo "WARNING: Could not fix volume permissions. If processing fails, check your volume mount permissions." >&2
echo "WARNING: Could not fix volume permissions. Use named volumes (not Windows bind mounts) to avoid this. See docs for details." >&2
exec gosu ashim "$@"
fi

View file

@ -356,7 +356,15 @@ export function runPythonWithProgress(
// Try persistent dispatcher first
const dispatcherPromise = dispatcherRun(scriptName, args, options);
if (dispatcherPromise) {
return dispatcherPromise;
return dispatcherPromise.catch((err: Error) => {
// Dispatcher crashed mid-request (e.g. OOM when loading a large model).
// Retry in an isolated per-request process which starts clean and has
// more available memory than the warm dispatcher.
if (err.message === "Python dispatcher exited unexpectedly") {
return runPythonPerRequest(scriptName, args, options);
}
throw err;
});
}
// Fall back to per-request spawning