feat: ⚠️ reduce images size and stop bundling sources (#5721)

feat: reduce images size and stop bundling sources

Do not copy sources anymore, and reduce packages of the base images by
not using builder images.

If needed to rebuild, just build the container image from scratch by
following the docs. We will slowly try to migrate all backends to the
gallery to keep the core small.

This PR is a breaking change, it also sets the base folders to /models
and /backends instead of /build/models and /build/backends.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
Ettore Di Giacinto 2025-06-26 18:41:38 +02:00 committed by GitHub
parent 7c4a2e9b85
commit 6644af10c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 123 additions and 157 deletions

View file

@ -2,82 +2,17 @@ ARG BASE_IMAGE=ubuntu:22.04
ARG GRPC_BASE_IMAGE=${BASE_IMAGE} ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
ARG INTEL_BASE_IMAGE=${BASE_IMAGE} ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
FROM ${BASE_IMAGE} AS requirements FROM ${BASE_IMAGE} AS requirements
USER root
ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH
ARG TARGETVARIANT
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
build-essential \ ca-certificates curl wget espeak-ng libgomp1 \
ccache \ python3 python-is-python3 ffmpeg && \
ca-certificates espeak-ng \
curl libssl-dev \
git \
git-lfs \
unzip upx-ucl python3 python-is-python3 && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers and rice
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
go install github.com/GeertJohan/go.rice/rice@latest
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
WORKDIR /build
###################################
###################################
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here. # The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
FROM requirements AS requirements-drivers FROM requirements AS requirements-drivers
@ -85,7 +20,8 @@ ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=0 ARG CUDA_MINOR_VERSION=0
ARG SKIP_DRIVERS=false ARG SKIP_DRIVERS=false
ARG TARGETARCH
ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE} ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements # Vulkan requirements
@ -152,6 +88,83 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
ldconfig \ ldconfig \
; fi ; fi
# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}
###################################
###################################
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
FROM requirements-drivers AS build-requirements
ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH
ARG TARGETVARIANT
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ccache \
ca-certificates espeak-ng \
curl libssl-dev \
git \
git-lfs \
unzip upx-ucl python3 python-is-python3 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers and rice
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
go install github.com/GeertJohan/go.rice/rice@latest
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
WORKDIR /build
################################### ###################################
################################### ###################################
@ -218,13 +231,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer # The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
FROM requirements-drivers AS builder-base FROM build-requirements AS builder-base
ARG GO_TAGS="tts p2p" ARG GO_TAGS="tts p2p"
ARG GRPC_BACKENDS ARG GRPC_BACKENDS
ARG MAKEFLAGS ARG MAKEFLAGS
ARG LD_FLAGS="-s -w" ARG LD_FLAGS="-s -w"
ARG TARGETARCH
ARG TARGETVARIANT
ENV GRPC_BACKENDS=${GRPC_BACKENDS} ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS} ENV GO_TAGS=${GO_TAGS}
ENV MAKEFLAGS=${MAKEFLAGS} ENV MAKEFLAGS=${MAKEFLAGS}
@ -259,6 +273,8 @@ EOT
# Compile backends first in a separate stage # Compile backends first in a separate stage
FROM builder-base AS builder-backends FROM builder-base AS builder-backends
ARG TARGETARCH
ARG TARGETVARIANT
COPY --from=grpc /opt/grpc /usr/local COPY --from=grpc /opt/grpc /usr/local
@ -314,24 +330,13 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
FROM builder-base AS devcontainer FROM builder-base AS devcontainer
ARG FFMPEG
COPY --from=grpc /opt/grpc /usr/local COPY --from=grpc /opt/grpc /usr/local
COPY .devcontainer-scripts /.devcontainer-scripts COPY .devcontainer-scripts /.devcontainer-scripts
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
ssh less wget ssh less
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime. # For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
RUN go install github.com/go-delve/delve/cmd/dlv@latest RUN go install github.com/go-delve/delve/cmd/dlv@latest
@ -345,40 +350,16 @@ RUN go install github.com/mikefarah/yq/v4@latest
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it. # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM requirements-drivers FROM requirements-drivers
ARG FFMPEG
ARG BUILD_TYPE
ARG TARGETARCH
ARG MAKEFLAGS
ENV BUILD_TYPE=${BUILD_TYPE}
ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV MAKEFLAGS=${MAKEFLAGS}
ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MAJOR_VERSION=12
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_VISIBLE_DEVICES=all
# Add FFmpeg WORKDIR /
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
WORKDIR /build COPY ./entrypoint.sh .
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
# https://github.com/go-skynet/LocalAI/pull/434
COPY . .
COPY --from=builder /build/sources ./sources/
COPY --from=grpc /opt/grpc /usr/local
# Copy the binary # Copy the binary
COPY --from=builder /build/local-ai ./ COPY --from=builder /build/local-ai ./
@ -387,12 +368,12 @@ COPY --from=builder /build/local-ai ./
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/ COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# Make sure the models directory exists # Make sure the models directory exists
RUN mkdir -p /build/models /build/backends RUN mkdir -p /models /backends
# Define the health check command # Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1 CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
VOLUME /build/models /build/backends VOLUME /models /backends
EXPOSE 8080 EXPOSE 8080
ENTRYPOINT [ "/build/entrypoint.sh" ] ENTRYPOINT [ "/entrypoint.sh" ]

View file

@ -135,4 +135,4 @@ check_vars
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS" echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
exec /build/entrypoint.sh "$@" exec /entrypoint.sh "$@"

View file

@ -268,7 +268,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example: To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
```bash ```bash
docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2 docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
``` ```
### Notes ### Notes
@ -296,7 +296,7 @@ To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}
To run LocalAI with Docker and Vulkan, you can use the following command as an example: To run LocalAI with Docker and Vulkan, you can use the following command as an example:
```bash ```bash
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models localai/localai:latest-vulkan-ffmpeg-core
``` ```
### Notes ### Notes
@ -308,7 +308,7 @@ These flags are the same as the sections above, depending on the hardware, for [
If you have mixed hardware, you can pass flags for multiple GPUs, for example: If you have mixed hardware, you can pass flags for multiple GPUs, for example:
```bash ```bash
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \ docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models \
--gpus=all \ # nvidia passthrough --gpus=all \ # nvidia passthrough
--device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough --device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
localai/localai:latest-vulkan-ffmpeg-core localai/localai:latest-vulkan-ffmpeg-core

View file

@ -92,7 +92,7 @@ services:
- DEBUG=true - DEBUG=true
# ... # ...
volumes: volumes:
- ./models:/build/models:cached - ./models:/models:cached
# decomment the following piece if running with Nvidia GPUs # decomment the following piece if running with Nvidia GPUs
# deploy: # deploy:
# resources: # resources:
@ -105,21 +105,21 @@ services:
{{% alert icon="💡" %}} {{% alert icon="💡" %}}
**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images. **Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`).
If you want to use a named model or a local directory, you can mount it as a volume to `/build/models`: If you want to use a named model or a local directory, you can mount it as a volume to `/models`:
```bash ```bash
docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai/localai:latest-aio-cpu docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu
``` ```
or associate a volume: or associate a volume:
```bash ```bash
docker volume create localai-models docker volume create localai-models
docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu
``` ```
{{% /alert %}} {{% /alert %}}

View file

@ -35,7 +35,7 @@ docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models: Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
```bash ```bash
docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core docker run -e DEBUG=true -p 8080:8080 -v /data/models:/models -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
``` ```
Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models. Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.

View file

@ -663,7 +663,7 @@ install_docker() {
IMAGE_TAG=${LOCALAI_VERSION}-vulkan IMAGE_TAG=${LOCALAI_VERSION}-vulkan
info "Starting LocalAI Docker container..." info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/build/models \ $SUDO docker run -v local-ai-data:/models \
--device /dev/dri \ --device /dev/dri \
--restart=always \ --restart=always \
-e API_KEY=$API_KEY \ -e API_KEY=$API_KEY \
@ -690,7 +690,7 @@ install_docker() {
fi fi
info "Starting LocalAI Docker container..." info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/build/models \ $SUDO docker run -v local-ai-data:/models \
--gpus all \ --gpus all \
--restart=always \ --restart=always \
-e API_KEY=$API_KEY \ -e API_KEY=$API_KEY \
@ -705,7 +705,7 @@ install_docker() {
fi fi
info "Starting LocalAI Docker container..." info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/build/models \ $SUDO docker run -v local-ai-data:/models \
--device /dev/dri \ --device /dev/dri \
--device /dev/kfd \ --device /dev/kfd \
--group-add=video \ --group-add=video \
@ -723,7 +723,7 @@ install_docker() {
fi fi
info "Starting LocalAI Docker container..." info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/build/models \ $SUDO docker run -v local-ai-data:/models \
--device /dev/dri \ --device /dev/dri \
--restart=always \ --restart=always \
-e API_KEY=$API_KEY \ -e API_KEY=$API_KEY \

View file

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
set -e set -e
cd /build cd /
# If we have set EXTRA_BACKENDS, then we need to prepare the backends # If we have set EXTRA_BACKENDS, then we need to prepare the backends
if [ -n "$EXTRA_BACKENDS" ]; then if [ -n "$EXTRA_BACKENDS" ]; then
@ -13,38 +13,23 @@ if [ -n "$EXTRA_BACKENDS" ]; then
done done
fi fi
if [ "$REBUILD" != "false" ]; then echo "CPU info:"
rm -rf ./local-ai grep -e "model\sname" /proc/cpuinfo | head -1
make build -j${BUILD_PARALLELISM:-1} grep -e "flags" /proc/cpuinfo | head -1
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
else else
echo "@@@@@" echo "CPU: no AVX found"
echo "Skipping rebuild" fi
echo "@@@@@" if grep -q -e "\savx2\s" /proc/cpuinfo ; then
echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true" echo "CPU: AVX2 found OK"
echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:" else
echo 'CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF"' echo "CPU: no AVX2 found"
echo "see the documentation at: https://localai.io/basics/build/index.html" fi
echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288" if grep -q -e "\savx512" /proc/cpuinfo ; then
echo "@@@@@" echo "CPU: AVX512 found OK"
echo "CPU info:" else
grep -e "model\sname" /proc/cpuinfo | head -1 echo "CPU: no AVX512 found"
grep -e "flags" /proc/cpuinfo | head -1
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
else
echo "CPU: no AVX found"
fi
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
echo "CPU: AVX2 found OK"
else
echo "CPU: no AVX2 found"
fi
if grep -q -e "\savx512" /proc/cpuinfo ; then
echo "CPU: AVX512 found OK"
else
echo "CPU: no AVX512 found"
fi
echo "@@@@@"
fi fi
exec ./local-ai "$@" exec ./local-ai "$@"