From a0548c1e2a95f780a71d8b72370bb4e1ba3945c9 Mon Sep 17 00:00:00 2001 From: Ahmad Kaouk <56095276+ahmadkaouk@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:45:48 +0100 Subject: [PATCH 1/2] docs: add deployment docs for validator set submitter (#469) ## Summary - Add a detailed settings reference table covering all config fields, environment variables, and CLI flags - Expand the observability section with a full metrics reference (counters, gauges, histograms with label values and bucket configs) - Add suggested Prometheus alerting rules for key failure modes (submitter down, missed eras, increasing errors, slow submissions) - Document the pre-built Docker Hub image (`datahavenxyz/validator-set-submitter`) and usage instructions - Add a troubleshooting guide for startup failures, missed eras, subscription exits, and debug logging - Clarify runtime/restart behavior: no built-in reconnect, in-memory attempt tracking resets on restart --- test/tools/validator-set-submitter/README.md | 212 ++++++++++++++++--- 1 file changed, 186 insertions(+), 26 deletions(-) diff --git a/test/tools/validator-set-submitter/README.md b/test/tools/validator-set-submitter/README.md index 1be7c45e..a398d4ba 100644 --- a/test/tools/validator-set-submitter/README.md +++ b/test/tools/validator-set-submitter/README.md @@ -1,6 +1,6 @@ # Validator Set Submitter -Long-running daemon that automatically submits validator-set updates from Ethereum to DataHaven each era via Snowbridge. +Daemon process that automatically submits validator-set updates from Ethereum to DataHaven each era via Snowbridge. ## How it works @@ -11,7 +11,14 @@ The submitter subscribes to finalized `Session.CurrentIndex` changes on DataHave 3. Is `ExternalIndex` already at or past `targetEra`? 4. Is the current session the last session of the era? -If all preconditions are met, it calls `sendNewValidatorSetForEra` on the ServiceManager contract. Each era gets a single submission attempt — if it fails, the era is missed and the submitter moves on to the next. +If all preconditions are met, it calls `sendNewValidatorSetForEra` on the ServiceManager contract. Submission attempt tracking is in-memory, so each era gets a single submission attempt per process run. If an attempt fails, that era is marked missed for this run and the submitter moves on to the next era. + +### Runtime and restart behavior + +- The submitter does not implement automatic reconnect/backoff for DataHaven session-subscription failures. +- On a subscription error, it logs the error, stops the watcher, and the process exits. +- Run it under a restart policy (for example `systemd` with `Restart=always` or Kubernetes with `restartPolicy: Always`). +- After a restart, a previously failed era may be attempted again if `ExternalIndex` has not advanced past that target era. ## Prerequisites @@ -44,6 +51,47 @@ relayer_fee: "0.2" # metrics_port: 8080 ``` +### Settings reference + +| Field | Type | Required | Default | Description | +|---|---|---|---|---| +| `ethereum_rpc_url` | string | Yes | — | Ethereum JSON-RPC endpoint | +| `datahaven_ws_url` | string | Yes | — | DataHaven WebSocket endpoint | +| `submitter_private_key` | hex string | No\* | — | Private key of the authorized submitter account (`0x` + 64 hex chars) | +| `network_id` | string | No | `"anvil"` | Network ID used to locate `contracts/deployments/{network_id}.json` | +| `service_manager_address` | hex address | No\*\* | — | ServiceManager contract address | +| `execution_fee` | string (ETH) | No | `"0.1"` | Snowbridge execution fee sent as `msg.value` | +| `relayer_fee` | string (ETH) | No | `"0.2"` | Snowbridge relayer fee sent as `msg.value` | +| `metrics_port` | integer | No | `8080` | Prometheus metrics server port (1–65535) | + +\* Required via one of: `--submitter-private-key` flag, `SUBMITTER_PRIVATE_KEY` env var, or `submitter_private_key` in config. +\*\* Required when running in Docker (deployment files are not included in the image). When omitted, the address is read from `contracts/deployments/{network_id}.json`. + +### Private key precedence + +The submitter private key is resolved in this order (first wins): + +1. `--submitter-private-key` CLI flag +2. `SUBMITTER_PRIVATE_KEY` environment variable +3. `submitter_private_key` in the config YAML file + +### Environment variables + +| Variable | Description | +|---|---| +| `SUBMITTER_PRIVATE_KEY` | Submitter private key (see precedence above) | +| `METRICS_PORT` | Override metrics port (takes precedence over config file, but CLI flag wins) | +| `LOG_LEVEL` | Log verbosity: `debug`, `info` (default), `warn`, `error` | + +### CLI flags + +| Flag | Description | +|---|---| +| `--config ` | Path to YAML config file (default: `./tools/validator-set-submitter/config.yml`) | +| `--submitter-private-key ` | Override submitter private key | +| `--metrics-port ` | Override metrics server port | +| `--dry-run` | Log what would be submitted without sending transactions | + ## Usage From the `test/` directory: @@ -65,54 +113,127 @@ bun tools/validator-set-submitter/main.ts run --submitter-private-key 0x... bun tools/validator-set-submitter/main.ts run --dry-run ``` -Private key precedence is: `--submitter-private-key` > `SUBMITTER_PRIVATE_KEY` > `submitter_private_key` in config file. - ## Observability -The submitter exposes a Prometheus metrics server on `metrics_port` (default `8080`): +The submitter exposes an HTTP server on `metrics_port` (default `8080`) with three endpoints: -- `GET /metrics` — Prometheus metrics -- `GET /healthz` — liveness -- `GET /readyz` — readiness (`200` once startup checks pass and watcher is running) +| Endpoint | Purpose | Codes | +|---|---|---| +| `GET /metrics` | Prometheus metrics scrape | `200` | +| `GET /healthz` | Liveness probe | `200` always | +| `GET /readyz` | Readiness probe | `200` when startup checks passed and watcher is running, `503` otherwise | -Key metrics: +### Metrics reference -- `validator_set_submitter_submissions_total{outcome="success|failed|dry_run"}` -- `validator_set_submitter_ticks_total{result="submitted_success|submitted_failed|skipped_*"}` -- `validator_set_submitter_errors_total{type="tick_error|subscription_error"}` -- `validator_set_submitter_missed_eras_total` -- `validator_set_submitter_consecutive_missed_eras` -- `validator_set_submitter_up` -- `validator_set_submitter_ready` +All metrics are prefixed with `validator_set_submitter_`. + +#### Counters + +| Metric | Labels | Description | +|---|---|---| +| `submissions_total` | `outcome`: `success`, `failed`, `dry_run` | Total submission attempts by result | +| `ticks_total` | `result`: `submitted_success`, `submitted_failed`, `skipped_no_active_era`, `skipped_already_submitted`, `skipped_already_confirmed`, `skipped_not_last_session` | Tick evaluation outcomes | +| `errors_total` | `type`: `tick_error`, `subscription_error` | Non-submission errors | +| `missed_eras_total` | — | Total eras where the submission attempt failed | + +#### Gauges + +| Metric | Description | +|---|---| +| `active_era` | Current active era on DataHaven | +| `target_era` | Target era for next submission (`active_era + 1`) | +| `external_index` | Latest confirmed era on-chain | +| `current_session` | Current session number | +| `last_submitted_era` | Last era successfully submitted | +| `consecutive_missed_eras` | Consecutive missed eras (resets to 0 on success) | +| `up` | `1` if watcher is running, `0` if stopped | +| `ready` | `1` if startup checks passed and watcher running, `0` otherwise | + +#### Histograms + +| Metric | Buckets | Description | +|---|---|---| +| `submission_duration_seconds` | 1, 5, 10, 30, 60, 120, 300 | Time from transaction send to receipt | +| `tick_duration_seconds` | 0.1, 0.5, 1, 2, 5, 10, 30 | Time to process one tick | + +### Alerting recommendations + +Example Prometheus alert rules for common failure modes: + +```yaml +groups: + - name: validator-set-submitter + rules: + - alert: SubmitterDown + expr: validator_set_submitter_up == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Validator set submitter is down" + + - alert: ConsecutiveMissedEras + expr: validator_set_submitter_consecutive_missed_eras > 0 + for: 0m + labels: + severity: critical + annotations: + summary: "Submitter has missed {{ $value }} consecutive era(s)" + + - alert: SubmissionErrorsIncreasing + expr: rate(validator_set_submitter_errors_total[5m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Submitter errors increasing (type={{ $labels.type }})" + + - alert: SlowSubmissions + expr: histogram_quantile(0.95, rate(validator_set_submitter_submission_duration_seconds_bucket[15m])) > 120 + for: 5m + labels: + severity: warning + annotations: + summary: "95th percentile submission duration exceeds 120s" +``` ## Docker -Build the image from the repository root: +A pre-built image is published to Docker Hub on every push to `main`: -```bash -docker build -f test/tools/validator-set-submitter/Dockerfile \ - -t datahavenxyz/validator-set-submitter:local . +``` +datahavenxyz/validator-set-submitter:latest +datahavenxyz/validator-set-submitter:sha- ``` -Run the submitter with mounted config and env private key: +Run the submitter with a mounted config and private key: ```bash docker run --rm \ - -v "$(pwd)/test/tools/validator-set-submitter/config.yml:/config/config.yml:ro" \ + -v "$(pwd)/config.yml:/config/config.yml:ro" \ -e SUBMITTER_PRIVATE_KEY=0x... \ - datahavenxyz/validator-set-submitter:local + datahavenxyz/validator-set-submitter:latest ``` Dry run: ```bash docker run --rm \ - -v "$(pwd)/test/tools/validator-set-submitter/config.yml:/config/config.yml:ro" \ + -v "$(pwd)/config.yml:/config/config.yml:ro" \ -e SUBMITTER_PRIVATE_KEY=0x... \ - datahavenxyz/validator-set-submitter:local --dry-run + datahavenxyz/validator-set-submitter:latest --dry-run ``` -The Docker image does not include `contracts/deployments/*.json`. In containerized runs, set `service_manager_address` in your config. +The Docker image does not include `contracts/deployments/*.json`. Set `service_manager_address` explicitly in your config. + +### Building locally + +To build the image from the repository root: + +```bash +docker build -f test/tools/validator-set-submitter/Dockerfile \ + -t datahavenxyz/validator-set-submitter:local . +``` ## Startup checks @@ -127,3 +248,42 @@ If any check fails, the process exits immediately. ## Shutdown Send `SIGINT` (Ctrl+C) or `SIGTERM`. The submitter unsubscribes from session changes and tears down connections cleanly. + +## Troubleshooting + +### Startup exits immediately + +| Symptom | Cause | Fix | +|---|---|---| +| `Cannot connect to Ethereum RPC` | Ethereum endpoint unreachable | Verify `ethereum_rpc_url` is correct and the node is running | +| `Cannot connect to DataHaven WS` | DataHaven endpoint unreachable | Verify `datahaven_ws_url` is correct and the node accepts WebSocket connections | +| `Account 0x... is not the authorized submitter` | Private key does not match the on-chain submitter | Call `setValidatorSetSubmitter` on the ServiceManager with the correct address, or fix the private key | +| `Missing submitter private key` | No key provided | Supply via `--submitter-private-key`, `SUBMITTER_PRIVATE_KEY` env var, or `submitter_private_key` in config | +| `Config file not found` | Wrong `--config` path | Check the path and ensure the file exists | + +### Missed eras + +When the submitter fails to submit for an era, `missed_eras_total` increments and `consecutive_missed_eras` increases. Common causes: + +- **Transaction reverted** — the submitter account may have insufficient ETH to cover `execution_fee + relayer_fee`. Fund the account. +- **RPC timeout** — the Ethereum RPC may be overloaded or unreachable. Check RPC health and consider a dedicated endpoint. +- **Snowbridge congestion** — if the bridge queue is full, submissions may fail. Check Snowbridge relayer status. +- **Already confirmed** — if another process submitted the era, the submitter skips it (this is normal, not an error). + +Check `LOG_LEVEL=debug` output for detailed tick-by-tick reasoning. + +### Process exits after running for a while + +| Symptom | Cause | Fix | +|---|---|---| +| `Session subscription error: ...` followed by process exit | DataHaven WebSocket subscription dropped and the submitter has no built-in reconnect loop | Ensure WebSocket stability and run the submitter with automatic restarts (`systemd`/Kubernetes) | + +### Enabling debug logs + +Set the `LOG_LEVEL` environment variable to `debug` for verbose output: + +```bash +LOG_LEVEL=debug bun tools/validator-set-submitter/main.ts run +``` + +Or in Docker/Kubernetes, add `LOG_LEVEL: "debug"` to the environment. Debug logs include per-tick skip reasons and detailed transaction information. From d5390be43d0fcdda7a1683c76d95c4d7618ec6a0 Mon Sep 17 00:00:00 2001 From: Ahmad Kaouk <56095276+ahmadkaouk@users.noreply.github.com> Date: Fri, 13 Mar 2026 13:45:41 +0100 Subject: [PATCH 2/2] ci: publish validator-set-submitter Docker image on release (#467) ## Summary - Add a new reusable workflow (`task-docker-release-validator-set-submitter.yml`) to build and publish the `datahavenxyz/validator-set-submitter` Docker image to Docker Hub - Wire it into the existing `release.yml` so the submitter image is published on every push to main ## Details - **Triggers**: `workflow_call` (from `release.yml`) and `workflow_dispatch` for manual builds with a custom label/branch - **Tagging**: `latest` + `sha-` on CI pushes; custom label on manual dispatch - **Build**: Uses the Dockerfile at `test/tools/validator-set-submitter/Dockerfile` - **Smoke test**: Pulls the published image and runs `--help` to verify it starts correctly - **Environment**: Requires `production` environment approval before publishing ## Test plan - [ ] Trigger the release workflow manually via `workflow_dispatch` and verify the `datahavenxyz/validator-set-submitter` image is published to Docker Hub - [ ] Verify the smoke test (`--help`) passes in CI - [ ] Merge to main and confirm the submitter image is published --- .github/workflows/release.yml | 8 +- ...docker-release-validator-set-submitter.yml | 105 ++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/task-docker-release-validator-set-submitter.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e34ed2d1..71708d2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,7 @@ on: permissions: contents: read - packages: write # Required for docker-build-release + packages: write # Required for docker build release jobs concurrency: group: release-${{ github.workflow }}-${{ github.ref }} @@ -30,3 +30,9 @@ jobs: secrets: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + validator-set-submitter-docker-build-release: + uses: ./.github/workflows/task-docker-release-validator-set-submitter.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/task-docker-release-validator-set-submitter.yml b/.github/workflows/task-docker-release-validator-set-submitter.yml new file mode 100644 index 00000000..bf7a7539 --- /dev/null +++ b/.github/workflows/task-docker-release-validator-set-submitter.yml @@ -0,0 +1,105 @@ +name: Docker Build & Publish Validator Set Submitter (Release) + +on: + workflow_dispatch: + inputs: + label: + description: "Label for the Docker image" + required: true + type: string + branch: + description: "Branch to checkout and build" + required: true + type: string + workflow_call: + secrets: + DOCKERHUB_USERNAME: + description: "Docker Hub username" + required: true + DOCKERHUB_TOKEN: + description: "Docker Hub access token" + required: true + outputs: + image-tag: + description: "The tag portion of the docker image (without registry)" + value: "${{ jobs.build-test-push.outputs.image-tag }}" + +permissions: + contents: read + packages: write + +concurrency: + group: docker-build-release-validator-set-submitter-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-test-push: + runs-on: ubuntu-latest + # Require approval before publishing to Docker Hub + environment: production + outputs: + image-tag: ${{ steps.extract_tag.outputs.image-tag }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.branch || github.ref }} + + - uses: ./.github/workflows/actions/cleanup-runner + + # --- Docker metadata --- + - name: Docker meta (dispatch) + if: github.event_name == 'workflow_dispatch' + id: meta-dispatch + uses: docker/metadata-action@v5 + with: + images: datahavenxyz/validator-set-submitter + flavor: | + latest=false + tags: | + type=raw,value=${{ github.event.inputs.label }} + + - name: Docker meta (CI - main push) + if: github.event_name != 'workflow_dispatch' + id: meta-ci + uses: docker/metadata-action@v5 + with: + images: datahavenxyz/validator-set-submitter + flavor: | + latest=true + tags: | + type=raw,value=latest + type=sha,format=short,prefix=sha- + + - name: Extract tag for job output + id: extract_tag + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + FULL_TAG=$(echo '${{ steps.meta-dispatch.outputs.json }}' | jq -r '.tags[-1]') + else + FULL_TAG=$(echo '${{ steps.meta-ci.outputs.json }}' | jq -r '.tags[-1]') + fi + TAG_ONLY=$(echo "$FULL_TAG" | sed 's|.*:||') + echo "image-tag=$TAG_ONLY" >> $GITHUB_OUTPUT + echo "image-name=datahavenxyz/validator-set-submitter:$TAG_ONLY" >> $GITHUB_OUTPUT + + # --- Build and push Docker image --- + - name: Build and push Docker image + uses: ./.github/workflow-templates/publish-docker + with: + dockerfile: ./test/tools/validator-set-submitter/Dockerfile + context: . + registry: docker.io + registry_username: ${{ secrets.DOCKERHUB_USERNAME }} + registry_password: ${{ secrets.DOCKERHUB_TOKEN }} + image_tags: ${{ steps.meta-dispatch.outputs.tags || steps.meta-ci.outputs.tags }} + image_title: "Validator Set Submitter - Release" + image_description: "Release build of DataHaven validator set submitter" + cache_scope: validator-set-submitter-release-build + + # --- Smoke tests --- + - name: Pull and test submitter --help + run: | + docker pull ${{ steps.extract_tag.outputs.image-name }} + docker run --rm ${{ steps.extract_tag.outputs.image-name }} --help