From 5e13193d8493c5bef8f2dc14926d2691559381f6 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 28 Feb 2026 08:42:53 +0100
Subject: [PATCH] docs: add CDI driver config for NVIDIA GPU in containers (fix
 #8108) (#8677)

This addresses issue #8108 where the legacy nvidia driver configuration
causes container startup failures with newer NVIDIA Container Toolkit versions.

Changes:
- Update docker-compose example to show both CDI (recommended) and legacy
  nvidia driver options
- Add troubleshooting section for 'Auto-detected mode as legacy' error
- Document the fix for nvidia-container-cli 'invalid expression' errors

The root cause is a Docker/NVIDIA Container Toolkit configuration issue,
not a LocalAI code bug. The error occurs during the container runtime's
prestart hook before LocalAI starts.

Co-authored-by: localai-bot <localai-bot@users.noreply.github.com>
---
 docker-compose.yaml                     | 18 +++++
 docs/content/installation/containers.md | 90 ++++++++++++++++++++-----
 2 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 9523457bc..ee0b3293a 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -27,6 +27,24 @@ services:
     # or an URL pointing to a YAML configuration file, for example:
     # - https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
     - phi-2
+    # For NVIDIA GPU support with CDI (recommended for NVIDIA Container Toolkit 1.14+):
+    # Uncomment the following deploy section and use driver: nvidia.com/gpu
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia.com/gpu
+    #           count: all
+    #           capabilities: [gpu]
+    #
+    # For legacy NVIDIA driver (for older NVIDIA Container Toolkit):
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
 
 volumes:
   models:
diff --git a/docs/content/installation/containers.md b/docs/content/installation/containers.md
index 4dfd36fdc..ae61ee7ac 100644
--- a/docs/content/installation/containers.md
+++ b/docs/content/installation/containers.md
@@ -139,17 +139,16 @@ podman run -ti --name local-ai -p 8080:8080 --device gpu.intel.com/all localai/l
 
 For a more manageable setup, especially with persistent volumes, use Docker Compose or Podman Compose:
 
+### Using CDI (Container Device Interface) - Recommended for NVIDIA Container Toolkit 1.14+
+
+The CDI approach is recommended for newer versions of the NVIDIA Container Toolkit (1.14 and later). It provides better compatibility and is the future-proof method:
+
 ```yaml
 version: "3.9"
 services:
   api:
-    image: localai/localai:latest-aio-cpu
-    # For GPU support, use one of:
-    # image: localai/localai:latest-aio-gpu-nvidia-cuda-13
-    # image: localai/localai:latest-aio-gpu-nvidia-cuda-12
-    # image: localai/localai:latest-aio-gpu-nvidia-cuda-11
-    # image: localai/localai:latest-aio-gpu-hipblas
-    # image: localai/localai:latest-aio-gpu-intel
+    image: localai/localai:latest-aio-gpu-nvidia-cuda-12
+    # For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
       interval: 1m
@@ -161,14 +160,15 @@ services:
       - DEBUG=false
     volumes:
       - ./models:/models:cached
-    # For NVIDIA GPUs, uncomment:
-    # deploy:
-    #   resources:
-    #     reservations:
-    #       devices:
-    #         - driver: nvidia
-    #           count: 1
-    #           capabilities: [gpu]
+    # CDI driver configuration (recommended for NVIDIA Container Toolkit 1.14+)
+    # This uses the nvidia.com/gpu resource API
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia.com/gpu
+              count: all
+              capabilities: [gpu]
 ```
 
 Save this as `compose.yaml` and run:
@@ -179,6 +179,37 @@ docker compose up -d
 podman-compose up -d
 ```
 
+### Using Legacy NVIDIA Driver - For Older NVIDIA Container Toolkit
+
+If you are using an older version of the NVIDIA Container Toolkit (before 1.14), or need backward compatibility, use the legacy approach:
+
+```yaml
+version: "3.9"
+services:
+  api:
+    image: localai/localai:latest-aio-gpu-nvidia-cuda-12
+    # For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
+      interval: 1m
+      timeout: 20m
+      retries: 5
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=false
+    volumes:
+      - ./models:/models:cached
+    # Legacy NVIDIA driver configuration (for older NVIDIA Container Toolkit)
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+```
+
 ## Persistent Storage
 
 To persist models and configurations, mount a volume:
@@ -244,6 +275,35 @@ After installation:
 - For NVIDIA: Install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
 - For AMD: Ensure devices are accessible: `ls -la /dev/kfd /dev/dri`
 
+### NVIDIA Container fails to start with "Auto-detected mode as 'legacy'" error
+
+If you encounter this error:
+```
+Error response from daemon: failed to create task for container: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: error running prestart hook #0: exit status 1, stdout: , stderr: Auto-detected mode as 'legacy'
+nvidia-container-cli: requirement error: invalid expression
+```
+
+This indicates a Docker/NVIDIA Container Toolkit configuration issue. The container runtime's prestart hook fails before LocalAI starts. This is **not** a LocalAI code bug.
+
+**Solutions:**
+
+1. **Use CDI mode (recommended)**: Update your docker-compose.yaml to use the CDI driver configuration:
+   ```yaml
+   deploy:
+     resources:
+       reservations:
+         devices:
+           - driver: nvidia.com/gpu
+             count: all
+             capabilities: [gpu]
+   ```
+
+2. **Upgrade NVIDIA Container Toolkit**: Ensure you have version 1.14 or later, which has better CDI support.
+
+3. **Check NVIDIA Container Toolkit configuration**: Run `nvidia-container-cli --query-gpu` to verify your installation is working correctly outside of containers.
+
+4. **Verify Docker GPU access**: Test with `docker run --rm --gpus all nvidia/cuda:12.0.0-base-ubuntu22.04 nvidia-smi`
+
 ### Models not downloading
 
 - Check internet connection