From ce59dd9625ae0fc81c6ad98ff570d34bf055abf3 Mon Sep 17 00:00:00 2001 From: Tim B <79199034+timbrinded@users.noreply.github.com> Date: Tue, 13 May 2025 10:12:32 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20=F0=9F=90=B3=20Improve=20Docker=20Cachin?= =?UTF-8?q?g=20(#66)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This PR attempts to improve caching, and thus speeds, for Docker image generation. This is to dramatically reduce the times of building images by using: - cargo chef - cache mounts - sccache - cache dance ## Context As a result this means thats changes that Do not Affect the code, should (in theory) not trigger a new build to be run. Changes that do change the rust code should also in theory be shorter as the dependencies are unlikely to have changed and so that too can be reused. In fact some part of the process should always be able to be re-used unless we do something like drastic like change rust-toolchain, but even then should only be a one time thing to regenerate that part of the cache. --------- Co-authored-by: Facundo Farall <37149322+ffarall@users.noreply.github.com> --- .../actions/cleanup-runner/action.yml | 56 +++++++++++-- .github/workflows/task-docker.yml | 81 ++++++++++++++----- .github/workflows/task-rust-lint.yml | 2 + .github/workflows/task-rust-tests.yml | 1 + README.md | 26 +++++- operator/.dockerignore | 52 ++++++++++++ operator/Dockerfile | 78 +++++++++++------- test/package.json | 2 +- 8 files changed, 237 insertions(+), 61 deletions(-) create mode 100644 operator/.dockerignore diff --git a/.github/workflows/actions/cleanup-runner/action.yml b/.github/workflows/actions/cleanup-runner/action.yml index 9be342f9..706dcee9 100644 --- a/.github/workflows/actions/cleanup-runner/action.yml +++ b/.github/workflows/actions/cleanup-runner/action.yml @@ -4,27 +4,67 @@ description: "Cleans up the runner environment to free up disk space" runs: using: "composite" steps: - - name: Install duf (optional pretty output) - run: sudo apt-get update -qq && sudo apt-get install -y -qq duf - shell: bash - - run: duf + - name: Show disk space and largest directories before cleanup shell: bash + run: | + echo "Overall disk space before cleanup (df -h /):" + df -h / + echo "-------------------------------------------" + echo "Top-level directories before cleanup (du -h --max-depth=1 /):" + sudo du -h --max-depth=1 --exclude=/proc --exclude=/sys --exclude=/dev / | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /usr/ before cleanup:" + sudo du -h --max-depth=1 /usr/ | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /usr/local/lib before cleanup:" + sudo du -h --max-depth=1 /usr/local/lib | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /usr/lib before cleanup:" + sudo du -h --max-depth=1 /usr/lib | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /opt before cleanup:" + sudo du -h --max-depth=1 /opt | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /opt/hostedtoolcache before cleanup:" + sudo du -h --max-depth=1 /opt/hostedtoolcache | sort -rh | head -n 10 - name: Free up disk space id: prune shell: bash run: | - echo "Space before:" ; df -h / + echo "--- Starting Cleanup ---" sudo rm -rf \ /usr/local/lib/android \ /usr/local/lib/heroku \ /usr/share/dotnet \ + /usr/lib/google-cloud-sdk \ + /usr/lib/jvm \ /opt/hostedtoolcache/CodeQL \ /opt/hostedtoolcache/go \ /opt/hostedtoolcache/Python \ /opt/hostedtoolcache/Ruby \ /opt/hostedtoolcache/Java sudo apt-get clean - echo "Space after:" ; df -h / - - run: duf + echo "--- Cleanup Finished ---" + - name: Show disk space and largest directories after cleanup shell: bash - \ No newline at end of file + run: | + echo "Overall disk space after cleanup (df -h /):" + df -h / + echo "-------------------------------------------" + echo "Top-level directories after cleanup (du -h --max-depth=1 /):" + sudo du -h --max-depth=1 --exclude=/proc --exclude=/sys --exclude=/dev / | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /usr/ before cleanup:" + sudo du -h --max-depth=1 /usr/ | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /usr/local/lib after cleanup:" + sudo du -h --max-depth=1 /usr/local/lib | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /usr/lib after cleanup:" + sudo du -h --max-depth=1 /usr/lib | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /opt after cleanup:" + sudo du -h --max-depth=1 /opt | sort -rh | head -n 10 + echo "-------------------------------------------" + echo "Detailed breakdown for /opt/hostedtoolcache after cleanup:" + sudo du -h --max-depth=1 /opt/hostedtoolcache | sort -rh | head -n 10 diff --git a/.github/workflows/task-docker.yml b/.github/workflows/task-docker.yml index 0f974354..0a217e7e 100644 --- a/.github/workflows/task-docker.yml +++ b/.github/workflows/task-docker.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - + - uses: ./.github/workflows/actions/cleanup-runner - name: Docker meta @@ -37,6 +37,7 @@ jobs: tags: | type=sha,format=short,prefix=sha- type=ref,event=tag + type=ref,event=branch type=ref,event=pr - name: Extract last tag for job output @@ -48,6 +49,7 @@ jobs: run: | echo "Generated tags: ${{ steps.meta.outputs.tags }}" echo "Generated labels: ${{ steps.meta.outputs.labels }}" + echo "Generated JSON: ${{ steps.meta.outputs.json }}" - uses: docker/setup-qemu-action@v3 - uses: docker/setup-buildx-action@v3 @@ -55,20 +57,44 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build Docker image (load for tests) + - name: Cache Mount blobs + uses: actions/cache@v4 + id: cache + with: + path: | + cargo-registry + cargo-git + sccache + key: cache-mount-${{ hashFiles('./operator/Dockerfile', './operator/Cargo.lock') }} + restore-keys: | + cache-mount-${{ hashFiles('./operator/Dockerfile') }} + cache-mount- + - name: Inject cache into docker + uses: reproducible-containers/buildkit-cache-dance@v3.1.0 + with: + cache-map: | + { + "cargo-registry": { "target": "/usr/local/cargo/registry" }, + "cargo-git": { "target": "/usr/local/cargo/git" }, + "sccache": { "target": "/usr/local/sccache" } + } + skip-extraction: ${{ steps.cache.outputs.cache-hit }} + - name: Build Docker image (load for tests) with detailed cache logging id: build uses: docker/build-push-action@v5 with: - context: ./operator + context: ./ file: ./operator/Dockerfile load: true push: false tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - build-args: | - BUILDKIT_INLINE_CACHE=1 - cache-from: type=gha,scope=polkadot-build - cache-to: type=gha,mode=max,scope=polkadot-build + cache-from: type=gha,scope=datahaven-build + cache-to: type=gha,mode=max,scope=datahaven-build + - name: Log build cache statistics + run: | + echo "Build cache statistics:" + docker buildx du --verbose # --- Smoke tests --- @@ -79,25 +105,44 @@ jobs: run: | docker run --rm -d -p 9944:9944 --name local-dh-node \ ${{ steps.last_tag_extractor.outputs.last_tag_value }} --dev --unsafe-rpc-external - - run: sleep 60 - - run: docker logs local-dh-node --tail 100 - - run: | - curl --fail --location 'http://127.0.0.1:9944' \ - --header 'Content-Type: application/json' \ - --data '{"jsonrpc":"2.0","id":1,"method":"system_chain","params":[]}' - - run: docker rm -f local-dh-node + - name: Wait for node to be healthy and test + run: | + echo "Waiting for node to start..." + for i in {1..30}; do # Retry for 30 * 5s = 150 seconds + if curl --fail --location 'http://127.0.0.1:9944' \ + --header 'Content-Type: application/json' \ + --data '{"jsonrpc":"2.0","id":1,"method":"system_chain","params":[]}' ; then + echo "Node is healthy!" + docker logs local-dh-node --tail 100 + exit 0 + fi + echo "Attempt $i: Node not ready yet, sleeping 5s..." + sleep 5 + done + echo "Node failed to start or respond in time." + docker logs local-dh-node --tail 100 + exit 1 + + - name: Cleanup integration test container + if: always() + run: docker rm -f local-dh-node # --- Push to Docker Hub --- - - name: Push Docker image + - name: Push Docker image with detailed cache logging uses: docker/build-push-action@v5 with: - context: ./operator + context: ./ file: ./operator/Dockerfile push: true load: false tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha,scope=polkadot-build - cache-to: type=gha,mode=max,scope=polkadot-build + cache-from: type=gha,scope=datahaven-build + cache-to: type=gha,mode=max,scope=datahaven-build provenance: mode=max sbom: true + + - name: Log final build cache statistics + run: | + echo "Final build cache statistics:" + docker buildx du --verbose diff --git a/.github/workflows/task-rust-lint.yml b/.github/workflows/task-rust-lint.yml index d3ec8724..af7fbf65 100644 --- a/.github/workflows/task-rust-lint.yml +++ b/.github/workflows/task-rust-lint.yml @@ -18,6 +18,8 @@ env: CARGO_TERM_COLOR: always WORKING_DIR: operator RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=mold" + SCCACHE_GHA_ENABLED: "true" + RUSTC_WRAPPER: "sccache" jobs: cargo-fmt: diff --git a/.github/workflows/task-rust-tests.yml b/.github/workflows/task-rust-tests.yml index 4d9eb59d..cfbe7fb5 100644 --- a/.github/workflows/task-rust-tests.yml +++ b/.github/workflows/task-rust-tests.yml @@ -22,6 +22,7 @@ jobs: CARGO_INCREMENTAL: "0" CARGO_TERM_COLOR: always RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=mold" + SCCACHE_GHA_ENABLED: "true" defaults: run: working-directory: ./operator diff --git a/README.md b/README.md index e0687845..e0228a51 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,33 @@ An EVM compatible Substrate chain, powered by StorageHub and secured by EigenLay datahaven/ ├── .github/ # GitHub Actions workflows. ├── contracts/ # Implementation of the DataHaven AVS (Autonomous Verifiable Service) smart contracts to interact with EigenLayer. -├── docker/ # Docker build files. ├── operator/ # DataHaven node based on Substrate. The "Operator" in EigenLayer terms. ├── test/ # Integration tests for the AVS and Operator. ├── resources/ # Miscellaneous resources for the DataHaven project. └── README.md ``` +## Docker + +This repo publishes images to [DockerHub](https://hub.docker.com/r/moonsonglabs/datahaven). + +> [!TIP] +> +> If you cannot see this repo you must be added to the permission list for the private repo. + +To aid with speed it employs the following: + +- [sccache](https://github.com/mozilla/sccache/tree/main): De-facto caching tool to speed up rust builds. +- [cargo chef](https://lpalmieri.com/posts/fast-rust-docker-builds/): A method of caching building the dependencies as a docker layer to cut down compilation times. +- [buildx cache mounts](https://docs.docker.com/build/cache/optimize/#use-cache-mounts): Using buildx's new feature to mount an externally restored cache into a container. +- [cache dance](https://github.com/reproducible-containers/buildkit-cache-dance): Weird workaround (endorsed by docker themselves) to inject caches into containers and return the result back to the CI. + +To run a docker image locally (`moonsonglabs/datahaven:local`), from the `/test` folder run: + +```sh +bun build:docker:operator +``` + ## CI Using the [act](https://github.com/nektos/act) binary, you can run GitHub Actions locally. @@ -22,7 +42,7 @@ Using the [act](https://github.com/nektos/act) binary, you can run GitHub Action For example, to run the entire `e2e` workflow: ```bash -act -W .github/workflows/e2e.yml -s GITHUB_TOKEN="$(gh auth token)" +act -W .github/workflows/e2e.yml -s GITHUB_TOKEN="$(gh auth token)" ``` Which results in: @@ -45,4 +65,4 @@ INFO[0000] Start server on http://192.168.1.97:34567 [E2E - Kurtosis Deploy and Verify/kurtosis] Cleaning up container for job kurtosis [E2E - Kurtosis Deploy and Verify/kurtosis] ✅ Success - Complete job [E2E - Kurtosis Deploy and Verify/kurtosis] 🏁 Job succeeded -``` \ No newline at end of file +``` diff --git a/operator/.dockerignore b/operator/.dockerignore new file mode 100644 index 00000000..dda79733 --- /dev/null +++ b/operator/.dockerignore @@ -0,0 +1,52 @@ +# Editor/IDE specific +**/.idea/ +**/.vscode/ +**/.DS_Store +**/*.swp +**/*.swo +**/*.bak +**/*~ + +# Temporary/cache files +**/*.tmp +**/*.log +**/*.profraw +**/*.profdata +**/.rustc_info.json +**/__pycache__/ +**/*.py[cod] +**/*.o +**/*.a + +# CI/CD +.travis.yml +.circleci/ +.gitlab-ci.yml +.github/ + +# Documentation +docs/ +**/doc/ +**/*.md +!README.md +!LICENSE + +# Tests +**/tests/ +**/test/ +**/*.test +**/*.spec.js + +# Other project directories and files not needed for build +.ropeproject/ +resources/ +contracts/ +timbo.log +scripts/ +examples/ +.editorconfig +.prettierrc +.eslintrc +Cargo.lock.old +*.toml.old +*.lock.old \ No newline at end of file diff --git a/operator/Dockerfile b/operator/Dockerfile index 7764e610..ef25a170 100644 --- a/operator/Dockerfile +++ b/operator/Dockerfile @@ -1,49 +1,65 @@ -# --- Analyze dependencies --- +# --- Setup Build Environment --- +FROM docker.io/paritytech/ci-unified:bullseye-1.85.0 AS base -FROM docker.io/paritytech/ci-unified:bullseye-1.85.0 AS chef +ARG MOLD_VERSION=2.39.0 +ARG SCCACHE_VERSION=0.10.0 +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + xz-utils \ + clang \ + && echo "Installing mold v${MOLD_VERSION}..." \ + && curl -Lo mold.tar.gz "https://github.com/rui314/mold/releases/download/v${MOLD_VERSION}/mold-${MOLD_VERSION}-x86_64-linux.tar.gz" \ + && tar -xf mold.tar.gz --strip-components=1 -C /usr/local \ + && rm mold.tar.gz \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && echo "Installing sccache v${SCCACHE_VERSION}..." \ + && curl -Lo sccache.tar.gz "https://github.com/mozilla/sccache/releases/download/v${SCCACHE_VERSION}/sccache-v${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" \ + && tar -xf sccache.tar.gz --strip-components=1 -C /usr/local/bin sccache-v${SCCACHE_VERSION}-x86_64-unknown-linux-musl/sccache \ + && rm sccache.tar.gz +RUN cargo install cargo-chef --version 0.1.71 --locked + +ENV RUSTC_WRAPPER=sccache \ + SCCACHE_DIR=/usr/local/sccache \ + SCCACHE_CACHE_SIZE=25G \ + RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=/usr/local/bin/mold" + +# --- Prepare build plan with cargo-chef --- +FROM base AS planner WORKDIR /datahaven - -RUN cargo install cargo-chef --locked -q - -COPY . . +COPY ./operator/ . RUN cargo chef prepare --recipe-path recipe.json - -# --- Build dependencies --- -FROM docker.io/paritytech/ci-unified:bullseye-1.85.0 AS builder - +# --- Build dependencies using cargo-chef --- +FROM base AS builder WORKDIR /datahaven +COPY --from=planner /datahaven/recipe.json recipe.json +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/usr/local/sccache,sharing=locked \ + cargo chef cook --recipe-path recipe.json --release +COPY ./operator/ . +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/usr/local/sccache,sharing=locked \ + cargo build --locked --release -RUN cargo install cargo-chef --locked -q -COPY --from=chef /datahaven/recipe.json recipe.json -RUN cargo chef cook --recipe-path recipe.json --release - -# Copy the full workspace *after* the dep cache is warm. -COPY . . - -RUN cargo build --locked --release --bin datahaven-node - - -# --- Prepare Runtime Environment --- +# --- Create final lightweight runtime image --- FROM docker.io/parity/base-bin:latest - COPY --from=builder /datahaven/target/release/datahaven-node /usr/local/bin -# --- Create minimal, non‑root runtime user --- USER root RUN useradd -m -u 1001 -U -s /bin/sh -d /datahaven datahaven && \ - mkdir -p /data /datahaven/.local/share && \ - chown -R datahaven:datahaven /data && \ - ln -s /data /datahaven/.local/share/datahaven && \ - # unclutter and minimise the attack surface - rm -rf /usr/bin /usr/sbin && \ - # sanity check that the binary is runnable inside this image - /usr/local/bin/datahaven-node --version + mkdir -p /data /datahaven/.local/share && \ + chown -R datahaven:datahaven /data && \ + ln -s /data /datahaven/.local/share/datahaven && \ + rm -rf /usr/bin /usr/sbin && \ + /usr/local/bin/datahaven-node --version USER datahaven EXPOSE 30333 9933 9944 9615 VOLUME ["/data"] -ENTRYPOINT ["/usr/local/bin/datahaven-node"] +ENTRYPOINT ["/usr/local/bin/datahaven-node"] \ No newline at end of file diff --git a/test/package.json b/test/package.json index 7704c4e4..9b51431b 100644 --- a/test/package.json +++ b/test/package.json @@ -7,7 +7,7 @@ "cli": "bun run cli/index.ts", "fmt": "biome check .", "fmt:fix": "biome check --write .", - "build:docker:operator": "docker build -t moonsonglabs/datahaven:local -f ../operator/Dockerfile ../operator", + "build:docker:operator": "docker buildx build -t moonsonglabs/datahaven-node:local -f ../operator/Dockerfile ../.", "build:docker:relayer": "bun -e \"import build from './scripts/snowbridge-relayer.ts'; build()\"", "generate:wagmi": "wagmi generate", "generate:snowbridge-cfgs": "bun -e \"import {generateSnowbridgeConfigs} from './scripts/gen-snowbridge-cfgs.ts'; await generateSnowbridgeConfigs()\"",