LocalAI/backend/python/vllm-omni/install.sh

#!/bin/bash
set -e

PYTHON_VERSION="3.12"
PYTHON_PATCH="12"
PY_STANDALONE_TAG="20251120"

backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
    source $backend_dir/common/libbackend.sh
else
    source $backend_dir/../common/libbackend.sh
fi

# Handle l4t build profiles (Python 3.12, pip fallback) if needed.
# unsafe-best-match is required on l4t13 because the jetson-ai-lab index
# lists transitive deps at limited versions — without it uv pins to the
# first matching index and fails to resolve a compatible wheel from PyPI.
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
  PYTHON_VERSION="3.12"
  PYTHON_PATCH="12"
  PY_STANDALONE_TAG="20251120"
  EXTRA_PIP_INSTALL_FLAGS="${EXTRA_PIP_INSTALL_FLAGS:-} --index-strategy=unsafe-best-match"
fi

if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
    USE_PIP=true
fi

# Install base requirements first
installRequirements

# Install vllm based on build type. vllm-omni tracks vllm master from
# source (cloned below) so we leave the upstream vllm dependency unpinned
# — vllm 0.19+ ships cu130 wheels by default, which is what we want for
# cublas13. Older cuda12/rocm/cpu paths still resolve a compatible wheel
# from the relevant channel.
if [ "x${BUILD_TYPE}" == "xhipblas" ]; then
    # ROCm
    if [ "x${USE_PIP}" == "xtrue" ]; then
        pip install vllm==0.14.0 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700
    else
        uv pip install vllm==0.14.0 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700
    fi
elif [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
    # JetPack 7 / L4T arm64 cu130 — vllm comes from the prebuilt SBSA wheel
    # at jetson-ai-lab. Version is unpinned: the index ships whatever build
    # matches the cu130/cp312 ABI. unsafe-best-match lets uv fall through
    # to PyPI for transitive deps not present on the jetson-ai-lab index.
    if [ "x${USE_PIP}" == "xtrue" ]; then
        pip install vllm --extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130
    else
        uv pip install --index-strategy=unsafe-best-match vllm --extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130
    fi
elif [ "x${BUILD_PROFILE}" == "xcublas13" ]; then
    # vllm 0.19+ defaults to cu130 wheels on PyPI, no extra index needed.
    if [ "x${USE_PIP}" == "xtrue" ]; then
        pip install vllm --torch-backend=auto
    else
        uv pip install vllm --torch-backend=auto
    fi
elif [ "x${BUILD_TYPE}" == "xcublas" ] || [ "x${BUILD_TYPE}" == "x" ]; then
    # cuda12 / CPU — keep the 0.14.0 pin for compatibility with the existing
    # cuda12 vllm-omni image; bumping should be its own change.
    if [ "x${USE_PIP}" == "xtrue" ]; then
        pip install vllm==0.14.0 --torch-backend=auto
    else
        uv pip install vllm==0.14.0 --torch-backend=auto
    fi
else
    echo "Unsupported build type: ${BUILD_TYPE}" >&2
    exit 1
fi

# Clone and install vllm-omni from source
if [ ! -d vllm-omni ]; then
    git clone https://github.com/vllm-project/vllm-omni.git
fi

cd vllm-omni/

# fa3-fwd ships no aarch64 wheels and there is no source distribution, so on
# aarch64 (e.g. l4t13 / SBSA cu130) the upstream requirements/cuda.txt is
# unsatisfiable. Drop it before resolving — vllm-omni does not hard-require
# the fused FA3 kernel at import time on Jetson/SBSA targets.
if [ "$(uname -m)" = "aarch64" ] && [ -f requirements/cuda.txt ]; then
    sed -i '/^fa3-fwd[[:space:]]*==/d' requirements/cuda.txt
fi

if [ "x${USE_PIP}" == "xtrue" ]; then
    pip install ${EXTRA_PIP_INSTALL_FLAGS:-} -e .
else
    uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} -e .
fi

cd ..
feat(vllm-omni): add new backend (#8188) * feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-24 21:23:30 +00:00			`#!/bin/bash`
			`set -e`

			`PYTHON_VERSION="3.12"`
			`PYTHON_PATCH="12"`
			`PY_STANDALONE_TAG="20251120"`

			`backend_dir=$(dirname $0)`
			`if [ -d $backend_dir/common ]; then`
			`source $backend_dir/common/libbackend.sh`
			`else`
			`source $backend_dir/../common/libbackend.sh`
			`fi`

feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang (#9553) * feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang Adds new build profiles mirroring the diffusers/ace-step pattern so vLLM serving (and SGLang on arm64) can be deployed on CUDA 13 hosts and JetPack 7 boards: - vllm: cublas13 (PyPI cu130 channel) + l4t13 (jetson-ai-lab SBSA cu130 prebuilt vllm + flash-attn). - vllm-omni: cublas13 + l4t13. Floats vllm version on cu13 since vllm 0.19+ ships cu130 wheels by default and vllm-omni tracks vllm master; cu12 path keeps the 0.14.0 pin to avoid disturbing existing images. - sglang: l4t13 arm64 only — uses the prebuilt sglang wheel from the jetson-ai-lab SBSA cu130 index, so no source build is needed. Cublas13 sglang on x86_64 is intentionally deferred. CI matrix gains five new images (-gpu-nvidia-cuda-13-vllm{,-omni}, -nvidia-l4t-cuda-13-arm64-{vllm,vllm-omni,sglang}); backend/index.yaml gains the matching capability keys (nvidia-cuda-13, nvidia-l4t-cuda-13) and latest/development merge entries. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] * fix(backends): use unsafe-best-match index strategy on l4t13 builds The jetson-ai-lab SBSA cu130 index lists transitive deps (decord, etc.) at limited versions / older Python ABIs. uv defaults to the first index that contains a package and refuses to fall through to PyPI, so sglang l4t13 build fails resolving decord. Mirror the existing cpu sglang profile by setting --index-strategy=unsafe-best-match on l4t13 across the three backends, and apply it to the explicit vllm install line in vllm-omni's install.sh (which doesn't honor EXTRA_PIP_INSTALL_FLAGS). Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] * fix(sglang): drop [all] extras on l4t13, floor version at 0.5.0 The [all] extra brings in outlines→decord, and decord has no aarch64 cp312 wheel on PyPI nor the jetson-ai-lab index (only legacy cp35-cp37 tags). With unsafe-best-match enabled, uv backtracked through sglang versions trying to satisfy decord and silently landed on sglang==0.1.16, an ancient version with an entirely different dep tree (cloudpickle/outlines 0.0.44, etc.). Drop [all] so decord is no longer required, and floor sglang at 0.5.0 to prevent any future resolver misfire from degrading the version again. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-04-25 10:26:29 +00:00			`# Handle l4t build profiles (Python 3.12, pip fallback) if needed.`
			`# unsafe-best-match is required on l4t13 because the jetson-ai-lab index`
			`# lists transitive deps at limited versions — without it uv pins to the`
			`# first matching index and fails to resolve a compatible wheel from PyPI.`
feat(vllm-omni): add new backend (#8188) * feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-24 21:23:30 +00:00			`if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then`
			`PYTHON_VERSION="3.12"`
			`PYTHON_PATCH="12"`
			`PY_STANDALONE_TAG="20251120"`
feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang (#9553) * feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang Adds new build profiles mirroring the diffusers/ace-step pattern so vLLM serving (and SGLang on arm64) can be deployed on CUDA 13 hosts and JetPack 7 boards: - vllm: cublas13 (PyPI cu130 channel) + l4t13 (jetson-ai-lab SBSA cu130 prebuilt vllm + flash-attn). - vllm-omni: cublas13 + l4t13. Floats vllm version on cu13 since vllm 0.19+ ships cu130 wheels by default and vllm-omni tracks vllm master; cu12 path keeps the 0.14.0 pin to avoid disturbing existing images. - sglang: l4t13 arm64 only — uses the prebuilt sglang wheel from the jetson-ai-lab SBSA cu130 index, so no source build is needed. Cublas13 sglang on x86_64 is intentionally deferred. CI matrix gains five new images (-gpu-nvidia-cuda-13-vllm{,-omni}, -nvidia-l4t-cuda-13-arm64-{vllm,vllm-omni,sglang}); backend/index.yaml gains the matching capability keys (nvidia-cuda-13, nvidia-l4t-cuda-13) and latest/development merge entries. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] * fix(backends): use unsafe-best-match index strategy on l4t13 builds The jetson-ai-lab SBSA cu130 index lists transitive deps (decord, etc.) at limited versions / older Python ABIs. uv defaults to the first index that contains a package and refuses to fall through to PyPI, so sglang l4t13 build fails resolving decord. Mirror the existing cpu sglang profile by setting --index-strategy=unsafe-best-match on l4t13 across the three backends, and apply it to the explicit vllm install line in vllm-omni's install.sh (which doesn't honor EXTRA_PIP_INSTALL_FLAGS). Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] * fix(sglang): drop [all] extras on l4t13, floor version at 0.5.0 The [all] extra brings in outlines→decord, and decord has no aarch64 cp312 wheel on PyPI nor the jetson-ai-lab index (only legacy cp35-cp37 tags). With unsafe-best-match enabled, uv backtracked through sglang versions trying to satisfy decord and silently landed on sglang==0.1.16, an ancient version with an entirely different dep tree (cloudpickle/outlines 0.0.44, etc.). Drop [all] so decord is no longer required, and floor sglang at 0.5.0 to prevent any future resolver misfire from degrading the version again. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-04-25 10:26:29 +00:00			`EXTRA_PIP_INSTALL_FLAGS="${EXTRA_PIP_INSTALL_FLAGS:-} --index-strategy=unsafe-best-match"`
feat(vllm-omni): add new backend (#8188) * feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-24 21:23:30 +00:00			`fi`

			`if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then`
			`USE_PIP=true`
			`fi`

			`# Install base requirements first`
			`installRequirements`

feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang (#9553) * feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang Adds new build profiles mirroring the diffusers/ace-step pattern so vLLM serving (and SGLang on arm64) can be deployed on CUDA 13 hosts and JetPack 7 boards: - vllm: cublas13 (PyPI cu130 channel) + l4t13 (jetson-ai-lab SBSA cu130 prebuilt vllm + flash-attn). - vllm-omni: cublas13 + l4t13. Floats vllm version on cu13 since vllm 0.19+ ships cu130 wheels by default and vllm-omni tracks vllm master; cu12 path keeps the 0.14.0 pin to avoid disturbing existing images. - sglang: l4t13 arm64 only — uses the prebuilt sglang wheel from the jetson-ai-lab SBSA cu130 index, so no source build is needed. Cublas13 sglang on x86_64 is intentionally deferred. CI matrix gains five new images (-gpu-nvidia-cuda-13-vllm{,-omni}, -nvidia-l4t-cuda-13-arm64-{vllm,vllm-omni,sglang}); backend/index.yaml gains the matching capability keys (nvidia-cuda-13, nvidia-l4t-cuda-13) and latest/development merge entries. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] * fix(backends): use unsafe-best-match index strategy on l4t13 builds The jetson-ai-lab SBSA cu130 index lists transitive deps (decord, etc.) at limited versions / older Python ABIs. uv defaults to the first index that contains a package and refuses to fall through to PyPI, so sglang l4t13 build fails resolving decord. Mirror the existing cpu sglang profile by setting --index-strategy=unsafe-best-match on l4t13 across the three backends, and apply it to the explicit vllm install line in vllm-omni's install.sh (which doesn't honor EXTRA_PIP_INSTALL_FLAGS). Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] * fix(sglang): drop [all] extras on l4t13, floor version at 0.5.0 The [all] extra brings in outlines→decord, and decord has no aarch64 cp312 wheel on PyPI nor the jetson-ai-lab index (only legacy cp35-cp37 tags). With unsafe-best-match enabled, uv backtracked through sglang versions trying to satisfy decord and silently landed on sglang==0.1.16, an ancient version with an entirely different dep tree (cloudpickle/outlines 0.0.44, etc.). Drop [all] so decord is no longer required, and floor sglang at 0.5.0 to prevent any future resolver misfire from degrading the version again. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-04-25 10:26:29 +00:00			`# Install vllm based on build type. vllm-omni tracks vllm master from`
			`# source (cloned below) so we leave the upstream vllm dependency unpinned`
			`# — vllm 0.19+ ships cu130 wheels by default, which is what we want for`
			`# cublas13. Older cuda12/rocm/cpu paths still resolve a compatible wheel`
			`# from the relevant channel.`
feat(vllm-omni): add new backend (#8188) * feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-24 21:23:30 +00:00			`if [ "x${BUILD_TYPE}" == "xhipblas" ]; then`
			`# ROCm`
			`if [ "x${USE_PIP}" == "xtrue" ]; then`
			`pip install vllm==0.14.0 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700`
			`else`
			`uv pip install vllm==0.14.0 --extra-index-url https://wheels.vllm.ai/rocm/0.14.0/rocm700`
			`fi`
feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang (#9553) * feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang Adds new build profiles mirroring the diffusers/ace-step pattern so vLLM serving (and SGLang on arm64) can be deployed on CUDA 13 hosts and JetPack 7 boards: - vllm: cublas13 (PyPI cu130 channel) + l4t13 (jetson-ai-lab SBSA cu130 prebuilt vllm + flash-attn). - vllm-omni: cublas13 + l4t13. Floats vllm version on cu13 since vllm 0.19+ ships cu130 wheels by default and vllm-omni tracks vllm master; cu12 path keeps the 0.14.0 pin to avoid disturbing existing images. - sglang: l4t13 arm64 only — uses the prebuilt sglang wheel from the jetson-ai-lab SBSA cu130 index, so no source build is needed. Cublas13 sglang on x86_64 is intentionally deferred. CI matrix gains five new images (-gpu-nvidia-cuda-13-vllm{,-omni}, -nvidia-l4t-cuda-13-arm64-{vllm,vllm-omni,sglang}); backend/index.yaml gains the matching capability keys (nvidia-cuda-13, nvidia-l4t-cuda-13) and latest/development merge entries. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] * fix(backends): use unsafe-best-match index strategy on l4t13 builds The jetson-ai-lab SBSA cu130 index lists transitive deps (decord, etc.) at limited versions / older Python ABIs. uv defaults to the first index that contains a package and refuses to fall through to PyPI, so sglang l4t13 build fails resolving decord. Mirror the existing cpu sglang profile by setting --index-strategy=unsafe-best-match on l4t13 across the three backends, and apply it to the explicit vllm install line in vllm-omni's install.sh (which doesn't honor EXTRA_PIP_INSTALL_FLAGS). Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] * fix(sglang): drop [all] extras on l4t13, floor version at 0.5.0 The [all] extra brings in outlines→decord, and decord has no aarch64 cp312 wheel on PyPI nor the jetson-ai-lab index (only legacy cp35-cp37 tags). With unsafe-best-match enabled, uv backtracked through sglang versions trying to satisfy decord and silently landed on sglang==0.1.16, an ancient version with an entirely different dep tree (cloudpickle/outlines 0.0.44, etc.). Drop [all] so decord is no longer required, and floor sglang at 0.5.0 to prevent any future resolver misfire from degrading the version again. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-04-25 10:26:29 +00:00			`elif [ "x${BUILD_PROFILE}" == "xl4t13" ]; then`
			`# JetPack 7 / L4T arm64 cu130 — vllm comes from the prebuilt SBSA wheel`
			`# at jetson-ai-lab. Version is unpinned: the index ships whatever build`
			`# matches the cu130/cp312 ABI. unsafe-best-match lets uv fall through`
			`# to PyPI for transitive deps not present on the jetson-ai-lab index.`
			`if [ "x${USE_PIP}" == "xtrue" ]; then`
			`pip install vllm --extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130`
			`else`
			`uv pip install --index-strategy=unsafe-best-match vllm --extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130`
			`fi`
			`elif [ "x${BUILD_PROFILE}" == "xcublas13" ]; then`
			`# vllm 0.19+ defaults to cu130 wheels on PyPI, no extra index needed.`
			`if [ "x${USE_PIP}" == "xtrue" ]; then`
			`pip install vllm --torch-backend=auto`
			`else`
			`uv pip install vllm --torch-backend=auto`
			`fi`
feat(vllm-omni): add new backend (#8188) * feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-24 21:23:30 +00:00			`elif [ "x${BUILD_TYPE}" == "xcublas" ] \|\| [ "x${BUILD_TYPE}" == "x" ]; then`
feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang (#9553) * feat(backends): add CUDA 13 + L4T arm64 CUDA 13 variants for vllm/vllm-omni/sglang Adds new build profiles mirroring the diffusers/ace-step pattern so vLLM serving (and SGLang on arm64) can be deployed on CUDA 13 hosts and JetPack 7 boards: - vllm: cublas13 (PyPI cu130 channel) + l4t13 (jetson-ai-lab SBSA cu130 prebuilt vllm + flash-attn). - vllm-omni: cublas13 + l4t13. Floats vllm version on cu13 since vllm 0.19+ ships cu130 wheels by default and vllm-omni tracks vllm master; cu12 path keeps the 0.14.0 pin to avoid disturbing existing images. - sglang: l4t13 arm64 only — uses the prebuilt sglang wheel from the jetson-ai-lab SBSA cu130 index, so no source build is needed. Cublas13 sglang on x86_64 is intentionally deferred. CI matrix gains five new images (-gpu-nvidia-cuda-13-vllm{,-omni}, -nvidia-l4t-cuda-13-arm64-{vllm,vllm-omni,sglang}); backend/index.yaml gains the matching capability keys (nvidia-cuda-13, nvidia-l4t-cuda-13) and latest/development merge entries. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] * fix(backends): use unsafe-best-match index strategy on l4t13 builds The jetson-ai-lab SBSA cu130 index lists transitive deps (decord, etc.) at limited versions / older Python ABIs. uv defaults to the first index that contains a package and refuses to fall through to PyPI, so sglang l4t13 build fails resolving decord. Mirror the existing cpu sglang profile by setting --index-strategy=unsafe-best-match on l4t13 across the three backends, and apply it to the explicit vllm install line in vllm-omni's install.sh (which doesn't honor EXTRA_PIP_INSTALL_FLAGS). Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] * fix(sglang): drop [all] extras on l4t13, floor version at 0.5.0 The [all] extra brings in outlines→decord, and decord has no aarch64 cp312 wheel on PyPI nor the jetson-ai-lab index (only legacy cp35-cp37 tags). With unsafe-best-match enabled, uv backtracked through sglang versions trying to satisfy decord and silently landed on sglang==0.1.16, an ancient version with an entirely different dep tree (cloudpickle/outlines 0.0.44, etc.). Drop [all] so decord is no longer required, and floor sglang at 0.5.0 to prevent any future resolver misfire from degrading the version again. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-04-25 10:26:29 +00:00			`# cuda12 / CPU — keep the 0.14.0 pin for compatibility with the existing`
			`# cuda12 vllm-omni image; bumping should be its own change.`
feat(vllm-omni): add new backend (#8188) * feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-24 21:23:30 +00:00			`if [ "x${USE_PIP}" == "xtrue" ]; then`
			`pip install vllm==0.14.0 --torch-backend=auto`
			`else`
			`uv pip install vllm==0.14.0 --torch-backend=auto`
			`fi`
			`else`
			`echo "Unsupported build type: ${BUILD_TYPE}" >&2`
			`exit 1`
			`fi`

			`# Clone and install vllm-omni from source`
			`if [ ! -d vllm-omni ]; then`
			`git clone https://github.com/vllm-project/vllm-omni.git`
			`fi`

			`cd vllm-omni/`

fix(ci): unbreak rerankers (torch bump) and vllm-omni on aarch64 (#9688) Two unrelated CI breakages bundled together since both are one-liners: - rerankers: bump torch 2.4.1 -> 2.7.1 on cpu/cublas12. The unpinned transformers resolves to 5.x, whose moe.py registers a custom_op with string-typed `'torch.Tensor'` annotations that torch 2.4.1's infer_schema rejects, blocking the gRPC server from starting and failing all 5 backend tests with "Connection refused" on :50051. Matches the version used by the transformers backend. - vllm-omni: strip fa3-fwd from the upstream requirements/cuda.txt before resolving on aarch64. fa3-fwd 0.0.3 ships only an x86_64 wheel and has no sdist, making the cuda profile unsatisfiable on Jetson/SBSA. fa3-fwd is a soft runtime dep — vllm-omni's attention backends fall back to FA2 then SDPA when it's missing. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io> 2026-05-06 15:07:24 +00:00			`# fa3-fwd ships no aarch64 wheels and there is no source distribution, so on`
			`# aarch64 (e.g. l4t13 / SBSA cu130) the upstream requirements/cuda.txt is`
			`# unsatisfiable. Drop it before resolving — vllm-omni does not hard-require`
			`# the fused FA3 kernel at import time on Jetson/SBSA targets.`
			`if [ "$(uname -m)" = "aarch64" ] && [ -f requirements/cuda.txt ]; then`
			`sed -i '/^fa3-fwd[[:space:]]*==/d' requirements/cuda.txt`
			`fi`

feat(vllm-omni): add new backend (#8188) * feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-24 21:23:30 +00:00			`if [ "x${USE_PIP}" == "xtrue" ]; then`
			`pip install ${EXTRA_PIP_INSTALL_FLAGS:-} -e .`
			`else`
			`uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} -e .`
			`fi`

			`cd ..`