fix: 🐳 Improve Docker Caching (#66)

## Summary

This PR attempts to improve caching, and thus speeds, for Docker image
generation.

This is to dramatically reduce the times of building images by using:

- cargo chef 
- cache mounts
- sccache
- cache dance

## Context

As a result this means thats changes that Do not Affect the code, should
(in theory) not trigger a new build to be run.

Changes that do change the rust code should also in theory be shorter as
the dependencies are unlikely to have changed and so that too can be
reused. In fact some part of the process should always be able to be
re-used unless we do something like drastic like change rust-toolchain,
but even then should only be a one time thing to regenerate that part of
the cache.

---------

Co-authored-by: Facundo Farall <37149322+ffarall@users.noreply.github.com>
This commit is contained in:
Tim B 2025-05-13 10:12:32 +01:00 committed by GitHub
parent 728c320926
commit ce59dd9625
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 237 additions and 61 deletions

View file

@ -4,27 +4,67 @@ description: "Cleans up the runner environment to free up disk space"
runs:
using: "composite"
steps:
- name: Install duf (optional pretty output)
run: sudo apt-get update -qq && sudo apt-get install -y -qq duf
shell: bash
- run: duf
- name: Show disk space and largest directories before cleanup
shell: bash
run: |
echo "Overall disk space before cleanup (df -h /):"
df -h /
echo "-------------------------------------------"
echo "Top-level directories before cleanup (du -h --max-depth=1 /):"
sudo du -h --max-depth=1 --exclude=/proc --exclude=/sys --exclude=/dev / | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /usr/ before cleanup:"
sudo du -h --max-depth=1 /usr/ | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /usr/local/lib before cleanup:"
sudo du -h --max-depth=1 /usr/local/lib | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /usr/lib before cleanup:"
sudo du -h --max-depth=1 /usr/lib | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /opt before cleanup:"
sudo du -h --max-depth=1 /opt | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /opt/hostedtoolcache before cleanup:"
sudo du -h --max-depth=1 /opt/hostedtoolcache | sort -rh | head -n 10
- name: Free up disk space
id: prune
shell: bash
run: |
echo "Space before:" ; df -h /
echo "--- Starting Cleanup ---"
sudo rm -rf \
/usr/local/lib/android \
/usr/local/lib/heroku \
/usr/share/dotnet \
/usr/lib/google-cloud-sdk \
/usr/lib/jvm \
/opt/hostedtoolcache/CodeQL \
/opt/hostedtoolcache/go \
/opt/hostedtoolcache/Python \
/opt/hostedtoolcache/Ruby \
/opt/hostedtoolcache/Java
sudo apt-get clean
echo "Space after:" ; df -h /
- run: duf
echo "--- Cleanup Finished ---"
- name: Show disk space and largest directories after cleanup
shell: bash
run: |
echo "Overall disk space after cleanup (df -h /):"
df -h /
echo "-------------------------------------------"
echo "Top-level directories after cleanup (du -h --max-depth=1 /):"
sudo du -h --max-depth=1 --exclude=/proc --exclude=/sys --exclude=/dev / | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /usr/ before cleanup:"
sudo du -h --max-depth=1 /usr/ | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /usr/local/lib after cleanup:"
sudo du -h --max-depth=1 /usr/local/lib | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /usr/lib after cleanup:"
sudo du -h --max-depth=1 /usr/lib | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /opt after cleanup:"
sudo du -h --max-depth=1 /opt | sort -rh | head -n 10
echo "-------------------------------------------"
echo "Detailed breakdown for /opt/hostedtoolcache after cleanup:"
sudo du -h --max-depth=1 /opt/hostedtoolcache | sort -rh | head -n 10

View file

@ -24,7 +24,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: ./.github/workflows/actions/cleanup-runner
- name: Docker meta
@ -37,6 +37,7 @@ jobs:
tags: |
type=sha,format=short,prefix=sha-
type=ref,event=tag
type=ref,event=branch
type=ref,event=pr
- name: Extract last tag for job output
@ -48,6 +49,7 @@ jobs:
run: |
echo "Generated tags: ${{ steps.meta.outputs.tags }}"
echo "Generated labels: ${{ steps.meta.outputs.labels }}"
echo "Generated JSON: ${{ steps.meta.outputs.json }}"
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
@ -55,20 +57,44 @@ jobs:
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build Docker image (load for tests)
- name: Cache Mount blobs
uses: actions/cache@v4
id: cache
with:
path: |
cargo-registry
cargo-git
sccache
key: cache-mount-${{ hashFiles('./operator/Dockerfile', './operator/Cargo.lock') }}
restore-keys: |
cache-mount-${{ hashFiles('./operator/Dockerfile') }}
cache-mount-
- name: Inject cache into docker
uses: reproducible-containers/buildkit-cache-dance@v3.1.0
with:
cache-map: |
{
"cargo-registry": { "target": "/usr/local/cargo/registry" },
"cargo-git": { "target": "/usr/local/cargo/git" },
"sccache": { "target": "/usr/local/sccache" }
}
skip-extraction: ${{ steps.cache.outputs.cache-hit }}
- name: Build Docker image (load for tests) with detailed cache logging
id: build
uses: docker/build-push-action@v5
with:
context: ./operator
context: ./
file: ./operator/Dockerfile
load: true
push: false
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
BUILDKIT_INLINE_CACHE=1
cache-from: type=gha,scope=polkadot-build
cache-to: type=gha,mode=max,scope=polkadot-build
cache-from: type=gha,scope=datahaven-build
cache-to: type=gha,mode=max,scope=datahaven-build
- name: Log build cache statistics
run: |
echo "Build cache statistics:"
docker buildx du --verbose
# --- Smoke tests ---
@ -79,25 +105,44 @@ jobs:
run: |
docker run --rm -d -p 9944:9944 --name local-dh-node \
${{ steps.last_tag_extractor.outputs.last_tag_value }} --dev --unsafe-rpc-external
- run: sleep 60
- run: docker logs local-dh-node --tail 100
- run: |
curl --fail --location 'http://127.0.0.1:9944' \
--header 'Content-Type: application/json' \
--data '{"jsonrpc":"2.0","id":1,"method":"system_chain","params":[]}'
- run: docker rm -f local-dh-node
- name: Wait for node to be healthy and test
run: |
echo "Waiting for node to start..."
for i in {1..30}; do # Retry for 30 * 5s = 150 seconds
if curl --fail --location 'http://127.0.0.1:9944' \
--header 'Content-Type: application/json' \
--data '{"jsonrpc":"2.0","id":1,"method":"system_chain","params":[]}' ; then
echo "Node is healthy!"
docker logs local-dh-node --tail 100
exit 0
fi
echo "Attempt $i: Node not ready yet, sleeping 5s..."
sleep 5
done
echo "Node failed to start or respond in time."
docker logs local-dh-node --tail 100
exit 1
- name: Cleanup integration test container
if: always()
run: docker rm -f local-dh-node
# --- Push to Docker Hub ---
- name: Push Docker image
- name: Push Docker image with detailed cache logging
uses: docker/build-push-action@v5
with:
context: ./operator
context: ./
file: ./operator/Dockerfile
push: true
load: false
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,scope=polkadot-build
cache-to: type=gha,mode=max,scope=polkadot-build
cache-from: type=gha,scope=datahaven-build
cache-to: type=gha,mode=max,scope=datahaven-build
provenance: mode=max
sbom: true
- name: Log final build cache statistics
run: |
echo "Final build cache statistics:"
docker buildx du --verbose

View file

@ -18,6 +18,8 @@ env:
CARGO_TERM_COLOR: always
WORKING_DIR: operator
RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=mold"
SCCACHE_GHA_ENABLED: "true"
RUSTC_WRAPPER: "sccache"
jobs:
cargo-fmt:

View file

@ -22,6 +22,7 @@ jobs:
CARGO_INCREMENTAL: "0"
CARGO_TERM_COLOR: always
RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=mold"
SCCACHE_GHA_ENABLED: "true"
defaults:
run:
working-directory: ./operator

View file

@ -8,13 +8,33 @@ An EVM compatible Substrate chain, powered by StorageHub and secured by EigenLay
datahaven/
├── .github/ # GitHub Actions workflows.
├── contracts/ # Implementation of the DataHaven AVS (Autonomous Verifiable Service) smart contracts to interact with EigenLayer.
├── docker/ # Docker build files.
├── operator/ # DataHaven node based on Substrate. The "Operator" in EigenLayer terms.
├── test/ # Integration tests for the AVS and Operator.
├── resources/ # Miscellaneous resources for the DataHaven project.
└── README.md
```
## Docker
This repo publishes images to [DockerHub](https://hub.docker.com/r/moonsonglabs/datahaven).
> [!TIP]
>
> If you cannot see this repo you must be added to the permission list for the private repo.
To aid with speed it employs the following:
- [sccache](https://github.com/mozilla/sccache/tree/main): De-facto caching tool to speed up rust builds.
- [cargo chef](https://lpalmieri.com/posts/fast-rust-docker-builds/): A method of caching building the dependencies as a docker layer to cut down compilation times.
- [buildx cache mounts](https://docs.docker.com/build/cache/optimize/#use-cache-mounts): Using buildx's new feature to mount an externally restored cache into a container.
- [cache dance](https://github.com/reproducible-containers/buildkit-cache-dance): Weird workaround (endorsed by docker themselves) to inject caches into containers and return the result back to the CI.
To run a docker image locally (`moonsonglabs/datahaven:local`), from the `/test` folder run:
```sh
bun build:docker:operator
```
## CI
Using the [act](https://github.com/nektos/act) binary, you can run GitHub Actions locally.
@ -22,7 +42,7 @@ Using the [act](https://github.com/nektos/act) binary, you can run GitHub Action
For example, to run the entire `e2e` workflow:
```bash
act -W .github/workflows/e2e.yml -s GITHUB_TOKEN="$(gh auth token)"
act -W .github/workflows/e2e.yml -s GITHUB_TOKEN="$(gh auth token)"
```
Which results in:
@ -45,4 +65,4 @@ INFO[0000] Start server on http://192.168.1.97:34567
[E2E - Kurtosis Deploy and Verify/kurtosis] Cleaning up container for job kurtosis
[E2E - Kurtosis Deploy and Verify/kurtosis] ✅ Success - Complete job
[E2E - Kurtosis Deploy and Verify/kurtosis] 🏁 Job succeeded
```
```

52
operator/.dockerignore Normal file
View file

@ -0,0 +1,52 @@
# Editor/IDE specific
**/.idea/
**/.vscode/
**/.DS_Store
**/*.swp
**/*.swo
**/*.bak
**/*~
# Temporary/cache files
**/*.tmp
**/*.log
**/*.profraw
**/*.profdata
**/.rustc_info.json
**/__pycache__/
**/*.py[cod]
**/*.o
**/*.a
# CI/CD
.travis.yml
.circleci/
.gitlab-ci.yml
.github/
# Documentation
docs/
**/doc/
**/*.md
!README.md
!LICENSE
# Tests
**/tests/
**/test/
**/*.test
**/*.spec.js
# Other project directories and files not needed for build
.ropeproject/
resources/
contracts/
timbo.log
scripts/
examples/
.editorconfig
.prettierrc
.eslintrc
Cargo.lock.old
*.toml.old
*.lock.old

View file

@ -1,49 +1,65 @@
# --- Analyze dependencies ---
# --- Setup Build Environment ---
FROM docker.io/paritytech/ci-unified:bullseye-1.85.0 AS base
FROM docker.io/paritytech/ci-unified:bullseye-1.85.0 AS chef
ARG MOLD_VERSION=2.39.0
ARG SCCACHE_VERSION=0.10.0
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
xz-utils \
clang \
&& echo "Installing mold v${MOLD_VERSION}..." \
&& curl -Lo mold.tar.gz "https://github.com/rui314/mold/releases/download/v${MOLD_VERSION}/mold-${MOLD_VERSION}-x86_64-linux.tar.gz" \
&& tar -xf mold.tar.gz --strip-components=1 -C /usr/local \
&& rm mold.tar.gz \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& echo "Installing sccache v${SCCACHE_VERSION}..." \
&& curl -Lo sccache.tar.gz "https://github.com/mozilla/sccache/releases/download/v${SCCACHE_VERSION}/sccache-v${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" \
&& tar -xf sccache.tar.gz --strip-components=1 -C /usr/local/bin sccache-v${SCCACHE_VERSION}-x86_64-unknown-linux-musl/sccache \
&& rm sccache.tar.gz
RUN cargo install cargo-chef --version 0.1.71 --locked
ENV RUSTC_WRAPPER=sccache \
SCCACHE_DIR=/usr/local/sccache \
SCCACHE_CACHE_SIZE=25G \
RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=/usr/local/bin/mold"
# --- Prepare build plan with cargo-chef ---
FROM base AS planner
WORKDIR /datahaven
RUN cargo install cargo-chef --locked -q
COPY . .
COPY ./operator/ .
RUN cargo chef prepare --recipe-path recipe.json
# --- Build dependencies ---
FROM docker.io/paritytech/ci-unified:bullseye-1.85.0 AS builder
# --- Build dependencies using cargo-chef ---
FROM base AS builder
WORKDIR /datahaven
COPY --from=planner /datahaven/recipe.json recipe.json
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/usr/local/sccache,sharing=locked \
cargo chef cook --recipe-path recipe.json --release
COPY ./operator/ .
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/usr/local/sccache,sharing=locked \
cargo build --locked --release
RUN cargo install cargo-chef --locked -q
COPY --from=chef /datahaven/recipe.json recipe.json
RUN cargo chef cook --recipe-path recipe.json --release
# Copy the full workspace *after* the dep cache is warm.
COPY . .
RUN cargo build --locked --release --bin datahaven-node
# --- Prepare Runtime Environment ---
# --- Create final lightweight runtime image ---
FROM docker.io/parity/base-bin:latest
COPY --from=builder /datahaven/target/release/datahaven-node /usr/local/bin
# --- Create minimal, nonroot runtime user ---
USER root
RUN useradd -m -u 1001 -U -s /bin/sh -d /datahaven datahaven && \
mkdir -p /data /datahaven/.local/share && \
chown -R datahaven:datahaven /data && \
ln -s /data /datahaven/.local/share/datahaven && \
# unclutter and minimise the attack surface
rm -rf /usr/bin /usr/sbin && \
# sanity check that the binary is runnable inside this image
/usr/local/bin/datahaven-node --version
mkdir -p /data /datahaven/.local/share && \
chown -R datahaven:datahaven /data && \
ln -s /data /datahaven/.local/share/datahaven && \
rm -rf /usr/bin /usr/sbin && \
/usr/local/bin/datahaven-node --version
USER datahaven
EXPOSE 30333 9933 9944 9615
VOLUME ["/data"]
ENTRYPOINT ["/usr/local/bin/datahaven-node"]
ENTRYPOINT ["/usr/local/bin/datahaven-node"]

View file

@ -7,7 +7,7 @@
"cli": "bun run cli/index.ts",
"fmt": "biome check .",
"fmt:fix": "biome check --write .",
"build:docker:operator": "docker build -t moonsonglabs/datahaven:local -f ../operator/Dockerfile ../operator",
"build:docker:operator": "docker buildx build -t moonsonglabs/datahaven-node:local -f ../operator/Dockerfile ../.",
"build:docker:relayer": "bun -e \"import build from './scripts/snowbridge-relayer.ts'; build()\"",
"generate:wagmi": "wagmi generate",
"generate:snowbridge-cfgs": "bun -e \"import {generateSnowbridgeConfigs} from './scripts/gen-snowbridge-cfgs.ts'; await generateSnowbridgeConfigs()\"",