feat(rocm): bump to 7.x (#9323)

feat(rocm): bump to 7.2.1

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2026-04-12 08:51:30 +02:00 committed by GitHub
parent 2865f0f8d3
commit 151ad271f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 114 additions and 99 deletions

View file

@ -28,7 +28,7 @@ Add build matrix entries for each platform/GPU type you want to support. Look at
- CUDA 13 builds: Add after other CUDA 13 builds (e.g., after `gpu-nvidia-cuda-13-chatterbox`) - CUDA 13 builds: Add after other CUDA 13 builds (e.g., after `gpu-nvidia-cuda-13-chatterbox`)
**Additional build types you may need:** **Additional build types you may need:**
- ROCm/HIP: Use `build-type: 'hipblas'` with `base-image: "rocm/dev-ubuntu-24.04:6.4.4"` - ROCm/HIP: Use `build-type: 'hipblas'` with `base-image: "rocm/dev-ubuntu-24.04:7.2.1"`
- Intel/SYCL: Use `build-type: 'intel'` or `build-type: 'sycl_f16'`/`sycl_f32` with `base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"` - Intel/SYCL: Use `build-type: 'intel'` or `build-type: 'sycl_f16'`/`sycl_f32` with `base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"`
- L4T (ARM): Use `build-type: 'l4t'` with `platforms: 'linux/arm64'` and `runs-on: 'ubuntu-24.04-arm'` - L4T (ARM): Use `build-type: 'l4t'` with `platforms: 'linux/arm64'` and `runs-on: 'ubuntu-24.04-arm'`

View file

@ -10,7 +10,7 @@ Let's say the user wants to build a particular backend for a given platform. For
- At a minimum we need to set the BUILD_TYPE, BASE_IMAGE build-args - At a minimum we need to set the BUILD_TYPE, BASE_IMAGE build-args
- Use .github/workflows/backend.yml as a reference it lists the needed args in the `include` job strategy matrix - Use .github/workflows/backend.yml as a reference it lists the needed args in the `include` job strategy matrix
- l4t and cublas also requires the CUDA major and minor version - l4t and cublas also requires the CUDA major and minor version
- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:6.4.4 make docker-build-coqui` - You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:7.2.1 make docker-build-coqui`
- Unless the user specifies that they want you to run the command, then just print it because not all agent frontends handle long running jobs well and the output may overflow your context - Unless the user specifies that they want you to run the command, then just print it because not all agent frontends handle long running jobs well and the output may overflow your context
- The user may say they want to build AMD or ROCM instead of hipblas, or Intel instead of SYCL or NVIDIA insted of l4t or cublas. Ask for confirmation if there is ambiguity. - The user may say they want to build AMD or ROCM instead of hipblas, or Intel instead of SYCL or NVIDIA insted of l4t or cublas. Ask for confirmation if there is ambiguity.
- Sometimes the user may need extra parameters to be added to `docker build` (e.g. `--platform` for cross-platform builds or `--progress` to view the full logs), in which case you can generate the `docker build` command directly. - Sometimes the user may need extra parameters to be added to `docker build` (e.g. `--platform` for cross-platform builds or `--progress` to view the full logs), in which case you can generate the `docker build` command directly.

View file

@ -1298,7 +1298,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-rerankers' tag-suffix: '-gpu-rocm-hipblas-rerankers'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "rerankers" backend: "rerankers"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1311,7 +1311,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-llama-cpp' tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "llama-cpp" backend: "llama-cpp"
dockerfile: "./backend/Dockerfile.llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp"
@ -1324,7 +1324,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-vllm' tag-suffix: '-gpu-rocm-hipblas-vllm'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "vllm" backend: "vllm"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1337,7 +1337,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-vllm-omni' tag-suffix: '-gpu-rocm-hipblas-vllm-omni'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "vllm-omni" backend: "vllm-omni"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1350,7 +1350,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-transformers' tag-suffix: '-gpu-rocm-hipblas-transformers'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "transformers" backend: "transformers"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1363,7 +1363,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-diffusers' tag-suffix: '-gpu-rocm-hipblas-diffusers'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "diffusers" backend: "diffusers"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1376,7 +1376,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-ace-step' tag-suffix: '-gpu-rocm-hipblas-ace-step'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "ace-step" backend: "ace-step"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1390,7 +1390,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-kokoro' tag-suffix: '-gpu-rocm-hipblas-kokoro'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "kokoro" backend: "kokoro"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1403,7 +1403,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-vibevoice' tag-suffix: '-gpu-rocm-hipblas-vibevoice'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "vibevoice" backend: "vibevoice"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1416,7 +1416,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-qwen-asr' tag-suffix: '-gpu-rocm-hipblas-qwen-asr'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "qwen-asr" backend: "qwen-asr"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1429,7 +1429,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-nemo' tag-suffix: '-gpu-rocm-hipblas-nemo'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "nemo" backend: "nemo"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1442,7 +1442,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-qwen-tts' tag-suffix: '-gpu-rocm-hipblas-qwen-tts'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "qwen-tts" backend: "qwen-tts"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1455,7 +1455,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-fish-speech' tag-suffix: '-gpu-rocm-hipblas-fish-speech'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "fish-speech" backend: "fish-speech"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1468,7 +1468,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-voxcpm' tag-suffix: '-gpu-rocm-hipblas-voxcpm'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "voxcpm" backend: "voxcpm"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1481,7 +1481,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-pocket-tts' tag-suffix: '-gpu-rocm-hipblas-pocket-tts'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "pocket-tts" backend: "pocket-tts"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1494,7 +1494,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-faster-whisper' tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
runs-on: 'bigger-runner' runs-on: 'bigger-runner'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "faster-whisper" backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1507,7 +1507,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-whisperx' tag-suffix: '-gpu-rocm-hipblas-whisperx'
runs-on: 'bigger-runner' runs-on: 'bigger-runner'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisperx" backend: "whisperx"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -1520,7 +1520,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-coqui' tag-suffix: '-gpu-rocm-hipblas-coqui'
runs-on: 'bigger-runner' runs-on: 'bigger-runner'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "coqui" backend: "coqui"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
@ -2175,7 +2175,7 @@ jobs:
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-whisper' tag-suffix: '-gpu-rocm-hipblas-whisper'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
@ -2254,7 +2254,7 @@ jobs:
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-acestep-cpp' tag-suffix: '-gpu-rocm-hipblas-acestep-cpp'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
skip-drivers: 'false' skip-drivers: 'false'
backend: "acestep-cpp" backend: "acestep-cpp"
@ -2456,7 +2456,7 @@ jobs:
# platforms: 'linux/amd64' # platforms: 'linux/amd64'
# tag-latest: 'auto' # tag-latest: 'auto'
# tag-suffix: '-gpu-hipblas-rfdetr' # tag-suffix: '-gpu-hipblas-rfdetr'
# base-image: "rocm/dev-ubuntu-24.04:6.4.4" # base-image: "rocm/dev-ubuntu-24.04:7.2.1"
# runs-on: 'ubuntu-latest' # runs-on: 'ubuntu-latest'
# skip-drivers: 'false' # skip-drivers: 'false'
# backend: "rfdetr" # backend: "rfdetr"
@ -2497,7 +2497,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-neutts' tag-suffix: '-gpu-rocm-hipblas-neutts'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
skip-drivers: 'false' skip-drivers: 'false'
backend: "neutts" backend: "neutts"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"

View file

@ -59,7 +59,7 @@
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-hipblas' tag-suffix: '-hipblas'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
grpc-base-image: "ubuntu:24.04" grpc-base-image: "ubuntu:24.04"
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"

View file

@ -41,7 +41,7 @@
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-hipblas' tag-suffix: '-gpu-hipblas'
base-image: "rocm/dev-ubuntu-24.04:6.4.4" base-image: "rocm/dev-ubuntu-24.04:7.2.1"
grpc-base-image: "ubuntu:24.04" grpc-base-image: "ubuntu:24.04"
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"

View file

@ -209,7 +209,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
rm -rf /var/lib/apt/lists/* && \ rm -rf /var/lib/apt/lists/* && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig \ ldconfig && \
# Log which GPU architectures have rocBLAS kernel support
echo "rocBLAS library data architectures:" && \
(ls /opt/rocm*/lib/rocblas/library/Kernels* 2>/dev/null || ls /opt/rocm*/lib64/rocblas/library/Kernels* 2>/dev/null) | grep -oP 'gfx[0-9a-z+-]+' | sort -u || \
echo "WARNING: No rocBLAS kernel data found" \
; fi ; fi
RUN echo "TARGETARCH: $TARGETARCH" RUN echo "TARGETARCH: $TARGETARCH"

View file

@ -33,7 +33,7 @@ else ifeq ($(BUILD_TYPE),hipblas)
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
else ifeq ($(BUILD_TYPE),vulkan) else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1 CMAKE_ARGS+=-DGGML_VULKAN=1

View file

@ -46,6 +46,10 @@ if [ "$(uname)" == "Darwin" ]; then
#export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH #export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
else else
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
# Tell rocBLAS where to find TensileLibrary data (GPU kernel tuning files)
if [ -d "$CURDIR/lib/rocblas/library" ]; then
export ROCBLAS_TENSILE_LIBPATH=$CURDIR/lib/rocblas/library
fi
fi fi
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View file

@ -26,7 +26,7 @@ else ifeq ($(BUILD_TYPE),hipblas)
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
else ifeq ($(BUILD_TYPE),vulkan) else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=ON CMAKE_ARGS+=-DGGML_VULKAN=ON

View file

@ -32,7 +32,7 @@ else ifeq ($(BUILD_TYPE),hipblas)
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
else ifeq ($(BUILD_TYPE),vulkan) else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON

View file

@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
torchaudio torchaudio
torchvision torchvision

View file

@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.9.1+rocm6.4 torch==2.10.0+rocm7.0
torchaudio==2.9.1+rocm6.4 torchaudio==2.10.0+rocm7.0
transformers transformers
numpy>=1.24.0,<1.26.0 numpy>=1.24.0,<1.26.0
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289

View file

@ -1,2 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch torch

View file

@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
torchaudio==2.8.0+rocm6.4 torchaudio==2.10.0+rocm7.0
transformers==4.48.3 transformers==4.48.3
accelerate accelerate
coqui-tts coqui-tts

View file

@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
torchvision==0.23.0+rocm6.4 torchvision==0.25.0+rocm7.0
git+https://github.com/huggingface/diffusers git+https://github.com/huggingface/diffusers
opencv-python opencv-python
transformers transformers

View file

@ -1,3 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch torch
faster-whisper faster-whisper

View file

@ -1,3 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.7.1+rocm6.3 torch==2.10.0+rocm7.0
torchaudio==2.7.1+rocm6.3 torchaudio==2.10.0+rocm7.0

View file

@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
torchaudio==2.8.0+rocm6.4 torchaudio==2.10.0+rocm7.0
transformers transformers
accelerate accelerate
kokoro kokoro

View file

@ -1,3 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch torch
nemo_toolkit[asr] nemo_toolkit[asr]

View file

@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
transformers==4.56.1 transformers==4.56.1
accelerate accelerate
librosa==0.11.0 librosa==0.11.0

View file

@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
accelerate accelerate
llvmlite==0.43.0 llvmlite==0.43.0
numba==0.60.0 numba==0.60.0

View file

@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3 --extra-index-url https://download.pytorch.org/whl/rocm7.0
pocket-tts pocket-tts
scipy scipy
torch==2.7.1+rocm6.3 torch==2.10.0+rocm7.0

View file

@ -1,3 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.7.1+rocm6.3 torch==2.10.0+rocm7.0
qwen-asr qwen-asr

View file

@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.7.1+rocm6.3 torch==2.10.0+rocm7.0
torchaudio==2.7.1+rocm6.3 torchaudio==2.10.0+rocm7.0
qwen-tts qwen-tts
sox sox

View file

@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
transformers transformers
accelerate accelerate
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
rerankers[transformers] rerankers[transformers]

View file

@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
torchvision==0.23.0+rocm6.4 torchvision==0.25.0+rocm7.0
rfdetr rfdetr
opencv-python opencv-python
accelerate accelerate

View file

@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0+rocm6.4 torch==2.10.0+rocm7.0
accelerate accelerate
transformers>=5.0.0 transformers>=5.0.0
llvmlite==0.43.0 llvmlite==0.43.0

View file

@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.7.1+rocm6.3 torch==2.10.0+rocm7.0
torchvision==0.22.1+rocm6.3 torchvision==0.25.0+rocm7.0
git+https://github.com/huggingface/diffusers git+https://github.com/huggingface/diffusers
opencv-python opencv-python
transformers>=4.51.3,<5.0.0 transformers>=4.51.3,<5.0.0

View file

@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4 --extra-index-url https://download.pytorch.org/whl/nightly/rocm7.0
accelerate accelerate
torch torch
transformers transformers

View file

@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4 --extra-index-url https://download.pytorch.org/whl/nightly/rocm7.0
accelerate accelerate
torch torch
transformers transformers

View file

@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.7.1+rocm6.3 torch==2.10.0+rocm7.0
soundfile soundfile
numpy numpy
voxcpm voxcpm

View file

@ -1,3 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4 --extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.8.0 torch==2.10.0+rocm7.0
whisperx @ git+https://github.com/m-bain/whisperX.git whisperx @ git+https://github.com/m-bain/whisperX.git

View file

@ -151,15 +151,15 @@ llama_init_from_file: kv self size = 512.00 MB
## ROCM(AMD) acceleration ## ROCM(AMD) acceleration
There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation. There are a limited number of tested configurations for ROCm systems however most newer dedicated GPU consumer grade devices seem to be supported under the current ROCm 7 implementation.
Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "installation/build#Acceleration" %}}) documentation. Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "installation/build#Acceleration" %}}) documentation.
### Requirements ### Requirements
- `ROCm 6.x.x` compatible GPU/accelerator - `ROCm 7.x.x` compatible GPU/accelerator
- OS: `Ubuntu` (22.04, 20.04), `RHEL` (9.3, 9.2, 8.9, 8.8), `SLES` (15.5, 15.4) - OS: `Ubuntu` (24.04, 22.04), `RHEL` (9.x), `SLES` (15.x)
- Installed to host: `amdgpu-dkms` and `rocm` >=6.0.0 as per ROCm documentation. - Installed to host: `amdgpu-dkms` and `rocm` >=7.0.0 as per ROCm documentation.
### Recommendations ### Recommendations
@ -171,24 +171,23 @@ Due to the nature of ROCm it is best to run all implementations in containers as
Ongoing verification testing of ROCm compatibility with integrated backends. Ongoing verification testing of ROCm compatibility with integrated backends.
Please note the following list of verified backends and devices. Please note the following list of verified backends and devices.
LocalAI hipblas images are built against the following targets: gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 LocalAI hipblas images are built against the following targets: gfx908, gfx90a, gfx942, gfx950, gfx1030, gfx1100, gfx1101, gfx1102, gfx1200, gfx1201
If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below. **Note:** Starting with ROCm 6.4, AMD removed rocBLAS kernel support for older architectures (gfx803, gfx900, gfx906). Since llama.cpp and other backends depend on rocBLAS for matrix operations, these GPUs (e.g. Radeon VII) are no longer supported in pre-built images.
If your device is not one of the above targets, you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. However, rebuilding will not help for architectures that lack rocBLAS kernel support in your ROCm version.
### Verified ### Verified
The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0` The devices in the following list have been tested with `hipblas` images.
| Backend | Verified | Devices | | Backend | Verified | Devices |
| ---- | ---- | ---- | | ---- | ---- | ---- |
| llama.cpp | yes | Radeon VII (gfx906) | | llama.cpp | yes | MI100 (gfx908), MI210/250 (gfx90a) |
| diffusers | yes | Radeon VII (gfx906) | | diffusers | yes | MI100 (gfx908), MI210/250 (gfx90a) |
| piper | yes | Radeon VII (gfx906) |
| whisper | no | none | | whisper | no | none |
| coqui | no | none | | coqui | no | none |
| transformers | no | none | | transformers | no | none |
| sentencetransformers | no | none |
| transformers-musicgen | no | none |
| vllm | no | none | | vllm | no | none |
**You can help by expanding this list.** **You can help by expanding this list.**
@ -196,8 +195,8 @@ The devices in the following list have been tested with `hipblas` images running
### System Prep ### System Prep
1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html). 1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatibility for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)) 2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the [ROCm compatibility matrix](https://rocm.docs.amd.com/en/latest/compatibility/compatibility-matrix.html).
3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html)) 3. Install your chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the [ROCm installation documentation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html).
4. Deploy. Yes it's that easy. 4. Deploy. Yes it's that easy.
#### Setup Example (Docker/containerd) #### Setup Example (Docker/containerd)
@ -212,7 +211,7 @@ The following are examples of the ROCm specific configuration elements required.
# If your gpu is not already included in the current list of default targets the following build details are required. # If your gpu is not already included in the current list of default targets the following build details are required.
- REBUILD=true - REBUILD=true
- BUILD_TYPE=hipblas - BUILD_TYPE=hipblas
- GPU_TARGETS=gfx906 # Example for Radeon VII - GPU_TARGETS=gfx1100 # Example for RX 7900 XTX
devices: devices:
# AMD GPU only require the following devices be passed through to the container for offloading to occur. # AMD GPU only require the following devices be passed through to the container for offloading to occur.
- /dev/dri - /dev/dri
@ -226,7 +225,7 @@ docker run \
-e DEBUG=true \ -e DEBUG=true \
-e REBUILD=true \ -e REBUILD=true \
-e BUILD_TYPE=hipblas \ -e BUILD_TYPE=hipblas \
-e GPU_TARGETS=gfx906 \ -e GPU_TARGETS=gfx1100 \
--device /dev/dri \ --device /dev/dri \
--device /dev/kfd \ --device /dev/kfd \
quay.io/go-skynet/local-ai:master-gpu-hipblas quay.io/go-skynet/local-ai:master-gpu-hipblas

View file

@ -163,7 +163,7 @@ rocminfo
docker run --device=/dev/kfd --device=/dev/dri --group-add=video ... docker run --device=/dev/kfd --device=/dev/dri --group-add=video ...
``` ```
If your GPU is not in the default target list, open up an Issue. Supported targets include: gfx900, gfx906, gfx908, gfx90a, gfx940, gfx941, gfx942, gfx1030, gfx1031, gfx1100, gfx1101. If your GPU is not in the default target list, open up an Issue. Supported targets include: gfx908, gfx90a, gfx942, gfx950, gfx1030, gfx1100, gfx1101, gfx1102, gfx1200, gfx1201.
**Intel (SYCL):** **Intel (SYCL):**

View file

@ -198,17 +198,25 @@ package_rocm_libs() {
fi fi
done done
# Copy rocblas library data (tuning files, etc.) # Copy rocblas library data (tuning files, TensileLibrary, etc.)
local old_nullglob=$(shopt -p nullglob) local old_nullglob=$(shopt -p nullglob)
shopt -s nullglob shopt -s nullglob
local rocm_dirs=(/opt/rocm /opt/rocm-*) local rocm_dirs=(/opt/rocm /opt/rocm-*)
eval "$old_nullglob" eval "$old_nullglob"
local rocblas_found=false
for rocm_base in "${rocm_dirs[@]}"; do for rocm_base in "${rocm_dirs[@]}"; do
if [ -d "$rocm_base/lib/rocblas" ]; then for lib_subdir in lib lib64; do
mkdir -p "$TARGET_LIB_DIR/rocblas" if [ -d "$rocm_base/$lib_subdir/rocblas" ]; then
cp -arfL "$rocm_base/lib/rocblas/"* "$TARGET_LIB_DIR/rocblas/" 2>/dev/null || true echo "Found rocblas data at $rocm_base/$lib_subdir/rocblas"
fi mkdir -p "$TARGET_LIB_DIR/rocblas"
cp -arfL "$rocm_base/$lib_subdir/rocblas/"* "$TARGET_LIB_DIR/rocblas/" || echo "WARNING: Failed to copy rocblas data from $rocm_base/$lib_subdir/rocblas"
rocblas_found=true
fi
done
done done
if [ "$rocblas_found" = false ]; then
echo "WARNING: No rocblas library data found in /opt/rocm*/lib{,64}/rocblas"
fi
# Copy libomp from LLVM (required for ROCm) # Copy libomp from LLVM (required for ROCm)
shopt -s nullglob shopt -s nullglob