datahaven/.github/workflows/DOCKER-PROD.yml
Steve Degosserie 1f38b4e343
fix: Complete CI compatibility with self-hosted GitHub runners (#134)
## Summary

This PR resolves all CI failures following the migration to self-hosted
GitHub runners (`DH-Testing` group) by eliminating sudo dependencies and
fixing Docker connectivity issues.

## Key Changes

### 🔧 **Eliminated sudo requirements across all workflows**
- **Setup Environment**: Installed mold linker and system dependencies
in userspace without sudo
- **Tool Installation**: Replaced apt/system package installations with
direct binary downloads:
  - Kurtosis: Direct binary download from GitHub releases (v1.10.3)
  - Taplo: Direct binary installation for Cargo.toml formatting
- cargo-nextest: Using `cargo install` instead of GitHub action
(v0.9.100)
- **Runner Cleanup**: Skipped cleanup-runner action entirely on
self-hosted runners (bare-metal manages disk space externally)

### 🐳 **Fixed Docker connectivity for E2E tests**  
- **Enhanced dockerode configuration** with robust fallback logic for
different socket locations
- **Added DOCKER_HOST environment variable** to E2E workflow for
consistent Docker daemon access
- **Implemented connection testing** with detailed error diagnostics for
troubleshooting
- **Resolves FailedToOpenSocket errors** by supporting multiple socket
paths and connection methods

### 🏷️ **Workflow optimizations**
- **Label-based targeting**: All heavy workloads (Rust builds, E2E
tests) now run on `DH-Testing` runners
- **Dependency management**: Used `install-deps: false` flag instead of
hardcoded runner detection
- **Permission fixes**: Corrected Docker build permissions and GHCR
organization names

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-09-09 21:18:50 +02:00

167 lines
6.1 KiB
YAML

name: Docker Build & Publish
on:
workflow_dispatch:
inputs:
label:
description: "Label for the Docker image"
required: true
type: string
branch:
description: "Branch to checkout and build"
required: true
type: string
fast_runtime:
description: "Enable fast runtime features"
required: false
type: boolean
default: false
push:
branches:
- main
jobs:
build-test-push:
runs-on:
group: DH-runners
outputs:
image-tag: ${{ steps.last_tag_extractor.outputs.last_tag_value }}
defaults:
run:
working-directory: operator
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- uses: ./.github/workflows/actions/cleanup-runner
- name: Docker meta (dispatch)
if: github.event_name == 'workflow_dispatch'
id: meta-dispatch
uses: docker/metadata-action@v5
with:
images: datahavenxyz/datahaven
flavor: |
latest=false
tags: |
type=raw,value=PROD-${{ github.event.inputs.label }}
- name: Docker meta (main push)
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
id: meta-main
uses: docker/metadata-action@v5
with:
images: datahavenxyz/datahaven
flavor: |
latest=true
tags: |
type=raw,value=latest
- name: Extract last tag for job output
id: last_tag_extractor
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
echo "last_tag_value=$(echo '${{ steps.meta-dispatch.outputs.json }}' | jq -r '.tags[-1]')" >> $GITHUB_OUTPUT
else
echo "last_tag_value=$(echo '${{ steps.meta-main.outputs.json }}' | jq -r '.tags[-1]')" >> $GITHUB_OUTPUT
fi
- name: Log Docker Metadata
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
echo "Generated tags: ${{ steps.meta-dispatch.outputs.tags }}"
echo "Generated labels: ${{ steps.meta-dispatch.outputs.labels }}"
echo "Generated JSON: ${{ steps.meta-dispatch.outputs.json }}"
else
echo "Generated tags: ${{ steps.meta-main.outputs.tags }}"
echo "Generated labels: ${{ steps.meta-main.outputs.labels }}"
echo "Generated JSON: ${{ steps.meta-main.outputs.json }}"
fi
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Cache Mount blobs
uses: actions/cache@v4
id: cache
with:
path: |
**/cargo-registry
**/cargo-git
**/sccache
key: cache-mount-${{ hashFiles('./operator/Dockerfile') }}-${{ hashFiles('./operator/Cargo.lock') }}-${{hashFiles('./operator/runtime/**/*.rs','./operator/pallets/**/*.rs', './operator/node/**/*.rs')}}
restore-keys: |
cache-mount-${{ hashFiles('./operator/Dockerfile') }}-${{ hashFiles('./operator/Cargo.lock') }}
cache-mount-${{ hashFiles('./operator/Dockerfile') }}
cache-mount-
- name: Inject cache into docker
uses: reproducible-containers/buildkit-cache-dance@v3.1.0
with:
cache-map: |
{
"cargo-registry": { "target": "/usr/local/cargo/registry" },
"cargo-git": { "target": "/usr/local/cargo/git" },
"sccache": { "target": "/usr/local/sccache" }
}
skip-extraction: ${{ steps.cache.outputs.cache-hit }}
- name: Build and push Docker image
id: build
uses: docker/build-push-action@v5
timeout-minutes: 240 # 4 hours
with:
context: ./operator
file: ./operator/Dockerfile
push: true
tags: ${{ github.event_name == 'workflow_dispatch' && steps.meta-dispatch.outputs.tags || steps.meta-main.outputs.tags }}
labels: ${{ github.event_name == 'workflow_dispatch' && steps.meta-dispatch.outputs.labels || steps.meta-main.outputs.labels }}
platforms: linux/amd64
build-args: |
FAST_RUNTIME=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fast_runtime == 'true' && 'TRUE' || 'FALSE' }}
cache-from: type=gha,scope=datahaven-build
cache-to: type=gha,mode=max,scope=datahaven-build
provenance: mode=max
sbom: true
- name: Log build cache statistics
run: |
echo "Build cache statistics:"
docker buildx du --verbose
# --- Smoke tests ---
- name: Pull and test node --help
run: |
docker pull ${{ steps.last_tag_extractor.outputs.last_tag_value }}
docker run --rm ${{ steps.last_tag_extractor.outputs.last_tag_value }} --help
- name: Integration test (dev chain starts)
run: |
docker run --rm -d -p 9944:9944 --name local-dh-node \
${{ steps.last_tag_extractor.outputs.last_tag_value }} --dev --unsafe-rpc-external
- name: Wait for node to be healthy and test
run: |
echo "Waiting for node to start..."
for i in {1..30}; do # Retry for 30 * 5s = 150 seconds
if curl --fail --location 'http://127.0.0.1:9944' \
--header 'Content-Type: application/json' \
--data '{"jsonrpc":"2.0","id":1,"method":"system_chain","params":[]}' ; then
echo "Node is healthy!"
docker logs local-dh-node --tail 100
exit 0
fi
echo "Attempt $i: Node not ready yet, sleeping 5s..."
sleep 5
done
echo "Node failed to start or respond in time."
docker logs local-dh-node --tail 100
exit 1
- name: Cleanup integration test container
if: always()
run: docker rm -f local-dh-node