diff --git a/.github/workflows/enforce-pr-labels.yml b/.github/workflows/enforce-pr-labels.yml index 3ac91cbc..af427deb 100644 --- a/.github/workflows/enforce-pr-labels.yml +++ b/.github/workflows/enforce-pr-labels.yml @@ -11,11 +11,11 @@ jobs: steps: - uses: yogevbd/enforce-label-action@2.2.2 with: - REQUIRED_LABELS_ANY: "B0-silent,B5-clientnoteworthy,B7-runtimenoteworthy" + REQUIRED_LABELS_ANY: "B0-silent,B5-clientnoteworthy,B7-runtimenoteworthy,B9-contractsnoteworthy" REQUIRED_LABELS_ALL: "" BANNED_LABELS: "" - name: Verify breaking changes label - if: contains(github.event.pull_request.labels.*.name, 'B5-clientnoteworthy') || contains(github.event.pull_request.labels.*.name, 'B7-runtimenoteworthy') + if: contains(github.event.pull_request.labels.*.name, 'B5-clientnoteworthy') || contains(github.event.pull_request.labels.*.name, 'B7-runtimenoteworthy') || contains(github.event.pull_request.labels.*.name, 'B9-contractsnoteworthy') uses: yogevbd/enforce-label-action@2.2.2 with: REQUIRED_LABELS_ANY: "breaking,not-breaking" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e34ed2d1..71708d2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,7 @@ on: permissions: contents: read - packages: write # Required for docker-build-release + packages: write # Required for docker build release jobs concurrency: group: release-${{ github.workflow }}-${{ github.ref }} @@ -30,3 +30,9 @@ jobs: secrets: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + validator-set-submitter-docker-build-release: + uses: ./.github/workflows/task-docker-release-validator-set-submitter.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/task-docker-release-validator-set-submitter.yml b/.github/workflows/task-docker-release-validator-set-submitter.yml new file mode 100644 index 00000000..73c70b13 --- /dev/null +++ b/.github/workflows/task-docker-release-validator-set-submitter.yml @@ -0,0 +1,105 @@ +name: Docker Build & Publish Validator Set Submitter (Release) + +on: + workflow_dispatch: + inputs: + label: + description: "Label for the Docker image" + required: true + type: string + branch: + description: "Branch to checkout and build" + required: true + type: string + workflow_call: + secrets: + DOCKERHUB_USERNAME: + description: "Docker Hub username" + required: true + DOCKERHUB_TOKEN: + description: "Docker Hub access token" + required: true + outputs: + image-tag: + description: "The tag portion of the docker image (without registry)" + value: "${{ jobs.build-test-push.outputs.image-tag }}" + +permissions: + contents: read + packages: write + +concurrency: + group: docker-build-release-validator-set-submitter-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-test-push: + runs-on: ubuntu-latest + # Require approval before publishing to Docker Hub + environment: production + outputs: + image-tag: ${{ steps.extract_tag.outputs.image-tag }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.branch || github.ref }} + + - uses: ./.github/workflows/actions/cleanup-runner + + # --- Docker metadata --- + - name: Docker meta (dispatch) + if: github.event_name == 'workflow_dispatch' + id: meta-dispatch + uses: docker/metadata-action@v5 + with: + images: datahavenxyz/validator-set-submitter + flavor: | + latest=false + tags: | + type=raw,value=${{ github.event.inputs.label }} + + - name: Docker meta (CI - main push) + if: github.event_name != 'workflow_dispatch' + id: meta-ci + uses: docker/metadata-action@v5 + with: + images: datahavenxyz/validator-set-submitter + flavor: | + latest=true + tags: | + type=raw,value=latest + type=sha,format=short,prefix=sha- + + - name: Extract tag for job output + id: extract_tag + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + FULL_TAG=$(echo '${{ steps.meta-dispatch.outputs.json }}' | jq -r '.tags[-1]') + else + FULL_TAG=$(echo '${{ steps.meta-ci.outputs.json }}' | jq -r '.tags[-1]') + fi + TAG_ONLY=$(echo "$FULL_TAG" | sed 's|.*:||') + echo "image-tag=$TAG_ONLY" >> $GITHUB_OUTPUT + echo "image-name=datahavenxyz/validator-set-submitter:$TAG_ONLY" >> $GITHUB_OUTPUT + + # --- Build and push Docker image --- + - name: Build and push Docker image + uses: ./.github/workflow-templates/publish-docker + with: + dockerfile: ./test/tools/validator-set-submitter/Dockerfile + context: ./test + registry: docker.io + registry_username: ${{ secrets.DOCKERHUB_USERNAME }} + registry_password: ${{ secrets.DOCKERHUB_TOKEN }} + image_tags: ${{ steps.meta-dispatch.outputs.tags || steps.meta-ci.outputs.tags }} + image_title: "Validator Set Submitter - Release" + image_description: "Release build of DataHaven validator set submitter" + cache_scope: validator-set-submitter-release-build + + # --- Smoke tests --- + - name: Pull and test submitter --help + run: | + docker pull ${{ steps.extract_tag.outputs.image-name }} + docker run --rm ${{ steps.extract_tag.outputs.image-name }} --help diff --git a/contracts/deployments/stagenet-hoodi.json b/contracts/deployments/stagenet-hoodi.json index aea20210..1ff70446 100644 --- a/contracts/deployments/stagenet-hoodi.json +++ b/contracts/deployments/stagenet-hoodi.json @@ -4,13 +4,13 @@ "AgentExecutor": "0x35d3FdCB19A246a1763421168dF69dA3dE207063", "Gateway": "0xE9352f1488F12bFEd722c133C129ca5F467463d1", "ServiceManager": "0xED73cCaF067cebC706B2B3a6cf2b9af2c696c6d3", - "ServiceManagerImplementation": "0x5E1DA2eE025Dac2F8c391Ac86ebA20bd34c32465", - "ProxyAdmin": "0xeb1a705e1aa96e6a6329d8a8eb0f5ec38eb7b69d", + "ServiceManagerImplementation": "0x0Af4a129D0F3d57B5bD51CAB323EA114C28c064a", "RewardsAgent": "0x2E039a88838241d1Ac738cf2e3C5763ba12571e7", "DelegationManager": "0x867837a9722C512e0862d8c2E15b8bE220E8b87d", "StrategyManager": "0xeE45e76ddbEDdA2918b8C7E3035cd37Eab3b5D41", "AVSDirectory": "0xD58f6844f79eB1fbd9f7091d05f7cb30d3363926", "RewardsCoordinator": "0x29e8572678e0c272350aa0b4B8f304E47EBcd5e7", "AllocationManager": "0x95a7431400F362F3647a69535C5666cA0133CAA0", - "PermissionController": "0xdcCF401fD121d8C542E96BC1d0078884422aFAD2" -} + "PermissionController": "0xdcCF401fD121d8C542E96BC1d0078884422aFAD2", + "ProxyAdmin": "0xeb1a705e1aa96e6a6329d8a8eb0f5ec38eb7b69d" +} \ No newline at end of file diff --git a/contracts/script/deploy/DeployBase.s.sol b/contracts/script/deploy/DeployBase.s.sol index 145c54aa..27f0d6fd 100644 --- a/contracts/script/deploy/DeployBase.s.sol +++ b/contracts/script/deploy/DeployBase.s.sol @@ -131,7 +131,8 @@ abstract contract DeployBase is Script, DeployParams, Accounts { // Deploy DataHaven contracts (same for both modes) ( DataHavenServiceManager serviceManager, - DataHavenServiceManager serviceManagerImplementation + DataHavenServiceManager serviceManagerImplementation, + ProxyAdmin actualProxyAdmin ) = _deployDataHavenContracts(avsConfig, proxyAdmin, gateway, agentAddress); Logging.logFooter(); @@ -151,7 +152,7 @@ abstract contract DeployBase is Script, DeployParams, Accounts { serviceManager, serviceManagerImplementation, agentAddress, - proxyAdmin + actualProxyAdmin ); _outputAgentInfo(agentAddress, snowbridgeConfig.messageOrigin); @@ -242,7 +243,7 @@ abstract contract DeployBase is Script, DeployParams, Accounts { ProxyAdmin proxyAdmin, IGatewayV2 gateway, address agentAddress - ) internal returns (DataHavenServiceManager, DataHavenServiceManager) { + ) internal returns (DataHavenServiceManager, DataHavenServiceManager, ProxyAdmin) { Logging.logHeader("DATAHAVEN CUSTOM CONTRACTS DEPLOYMENT"); // Deploy the Service Manager @@ -278,7 +279,7 @@ abstract contract DeployBase is Script, DeployParams, Accounts { }); // Create the service manager proxy (different logic for local vs testnet) - DataHavenServiceManager serviceManager = + (DataHavenServiceManager serviceManager, ProxyAdmin actualProxyAdmin) = _createServiceManagerProxy(serviceManagerImplementation, proxyAdmin, initParams); Logging.logContractDeployed("ServiceManager Proxy", address(serviceManager)); @@ -293,17 +294,19 @@ abstract contract DeployBase is Script, DeployParams, Accounts { Logging.logInfo("TX EXECUTION DISABLED: call updateAVSMetadataURI via multisig"); } - return (serviceManager, serviceManagerImplementation); + return (serviceManager, serviceManagerImplementation, actualProxyAdmin); } /** * @notice Create service manager proxy - implementation varies by deployment type + * @return serviceManager The proxied ServiceManager instance + * @return actualProxyAdmin The ProxyAdmin that controls the proxy (may differ from the input for live deployments) */ function _createServiceManagerProxy( DataHavenServiceManager implementation, ProxyAdmin proxyAdmin, ServiceManagerInitParams memory params - ) internal virtual returns (DataHavenServiceManager); + ) internal virtual returns (DataHavenServiceManager serviceManager, ProxyAdmin actualProxyAdmin); /** * @notice Output deployed addresses with mode-specific logic diff --git a/contracts/script/deploy/DeployLive.s.sol b/contracts/script/deploy/DeployLive.s.sol index 3becd30e..6530a6c9 100644 --- a/contracts/script/deploy/DeployLive.s.sol +++ b/contracts/script/deploy/DeployLive.s.sol @@ -109,7 +109,7 @@ contract DeployLive is DeployBase { DataHavenServiceManager implementation, ProxyAdmin, // Ignored for live deployment ServiceManagerInitParams memory params - ) internal override returns (DataHavenServiceManager) { + ) internal override returns (DataHavenServiceManager, ProxyAdmin) { // Live deployment creates its own ProxyAdmin for the service manager vm.broadcast(_deployerPrivateKey); ProxyAdmin proxyAdmin = new ProxyAdmin(); @@ -117,7 +117,7 @@ contract DeployLive is DeployBase { // Transfer ProxyAdmin ownership to AVS owner so upgrades can only be performed by AVS owner vm.broadcast(_deployerPrivateKey); - proxyAdmin.transferOwnership(_avsOwner); + proxyAdmin.transferOwnership(params.avsOwner); Logging.logStep("ProxyAdmin ownership transferred to AVS owner"); vm.broadcast(_deployerPrivateKey); @@ -134,7 +134,7 @@ contract DeployLive is DeployBase { TransparentUpgradeableProxy proxy = new TransparentUpgradeableProxy(address(implementation), address(proxyAdmin), initData); - return DataHavenServiceManager(address(proxy)); + return (DataHavenServiceManager(address(proxy)), proxyAdmin); } function _outputDeployedAddresses( diff --git a/contracts/script/deploy/DeployLocal.s.sol b/contracts/script/deploy/DeployLocal.s.sol index 25ee2ebb..4c41cb0e 100644 --- a/contracts/script/deploy/DeployLocal.s.sol +++ b/contracts/script/deploy/DeployLocal.s.sol @@ -197,7 +197,7 @@ contract DeployLocal is DeployBase { DataHavenServiceManager implementation, ProxyAdmin proxyAdmin, ServiceManagerInitParams memory params - ) internal override returns (DataHavenServiceManager) { + ) internal override returns (DataHavenServiceManager, ProxyAdmin) { // Prepare strategies for service manager (local deployment has deployed strategies) _prepareStrategiesForServiceManager(params); @@ -215,7 +215,7 @@ contract DeployLocal is DeployBase { TransparentUpgradeableProxy proxy = new TransparentUpgradeableProxy(address(implementation), address(proxyAdmin), initData); - return DataHavenServiceManager(address(proxy)); + return (DataHavenServiceManager(address(proxy)), proxyAdmin); } function _outputDeployedAddresses( diff --git a/test/cli/handlers/contracts/upgrade.ts b/test/cli/handlers/contracts/upgrade.ts index 31d27304..50c30341 100644 --- a/test/cli/handlers/contracts/upgrade.ts +++ b/test/cli/handlers/contracts/upgrade.ts @@ -4,12 +4,13 @@ import path from "node:path"; import { logger, printDivider } from "utils"; import { type Deployments, parseDeploymentsFile } from "utils/contracts"; import { encodeFunctionData } from "viem"; -import { CHAIN_CONFIGS } from "../../../configs/contracts/config"; +import { buildNetworkId, CHAIN_CONFIGS } from "../../../configs/contracts/config"; import { buildContracts } from "../../../scripts/deploy-contracts"; import { verifyContracts } from "./verify"; interface ContractsUpgradeOptions { chain: string; + environment?: string; rpcUrl?: string; privateKeyFile?: string; verify?: boolean; @@ -114,10 +115,14 @@ const executeCommand = async ( */ export const contractsUpgrade = async (options: ContractsUpgradeOptions) => { const isDryRun = !options.execute; + const networkId = buildNetworkId(options.chain, options.environment); try { logger.info("🔄 Starting contract upgrade..."); logger.info(`📡 Using chain: ${options.chain}`); + if (options.environment) { + logger.info(`📡 Using environment: ${options.environment}`); + } if (isDryRun) { logger.info( "â„šī¸ Dry-run mode: the proxy upgrade transaction will NOT be broadcast. Calldata will be printed for manual multisig execution." @@ -149,19 +154,19 @@ export const contractsUpgrade = async (options: ContractsUpgradeOptions) => { // Deploy new implementation contracts (signed by deployer — any funded account) const serviceManagerImplAddress = await deployImplementationContracts( - options.chain, + networkId, rpcUrl, deployerKey ); if (isDryRun) { // Print the calldata for the proxy upgrade so the multisig team can execute it - await printProxyUpgradeCalldata(options.chain, serviceManagerImplAddress, targetVersion); + await printProxyUpgradeCalldata(networkId, serviceManagerImplAddress, targetVersion); } else { // Update proxy contracts to point to new implementations AND update version in one transaction. // Must be signed by the AVS owner, who owns both the ProxyAdmin and the ServiceManager. await updateProxyContracts( - options.chain, + networkId, rpcUrl, avsOwnerKey as string, serviceManagerImplAddress, @@ -173,6 +178,7 @@ export const contractsUpgrade = async (options: ContractsUpgradeOptions) => { logger.info("🔍 Verifying upgraded contracts..."); await verifyContracts({ chain: options.chain, + environment: options.environment, rpcUrl, skipVerification: false }); @@ -195,7 +201,7 @@ export const contractsUpgrade = async (options: ContractsUpgradeOptions) => { * Deploys only the implementation contracts */ const deployImplementationContracts = async ( - chain: string, + networkId: string, rpcUrl: string, privateKey: string ): Promise => { @@ -203,15 +209,15 @@ const deployImplementationContracts = async ( // Deploy new ServiceManager implementation const serviceManagerImplAddress = await deployServiceManagerImplementation( - chain, + networkId, rpcUrl, privateKey ); logger.success(`ServiceManager Implementation deployed: ${serviceManagerImplAddress}`); // Persist the new implementation address so it becomes the source-of-truth for subsequent steps. - const deploymentPath = `../contracts/deployments/${chain}.json`; - const currentDeployments = await parseDeploymentsFile(chain); + const deploymentPath = `../contracts/deployments/${networkId}.json`; + const currentDeployments = await parseDeploymentsFile(networkId); const updatedDeployments = { ...currentDeployments, ServiceManagerImplementation: serviceManagerImplAddress as `0x${string}` @@ -226,13 +232,13 @@ const deployImplementationContracts = async ( * Deploys new ServiceManager implementation contract */ const deployServiceManagerImplementation = async ( - chain: string, + networkId: string, rpcUrl: string, privateKey: string ): Promise => { logger.info("đŸ“Ļ Deploying ServiceManager implementation..."); - const actualDeployments = await parseDeploymentsFile(chain); + const actualDeployments = await parseDeploymentsFile(networkId); // Note: Private key is passed via PRIVATE_KEY environment variable (not command-line) // to prevent it from appearing in system process lists (security best practice) @@ -315,11 +321,11 @@ const PROXY_ADMIN_ABI = [ * The call combines the proxy upgrade and the version update in one atomic transaction. */ const printProxyUpgradeCalldata = async ( - chain: string, + networkId: string, serviceManagerImplAddress: string, version: string ) => { - const deployments = await parseDeploymentsFile(chain); + const deployments = await parseDeploymentsFile(networkId); const proxyAdmin = deployments.ProxyAdmin ?? process.env.PROXY_ADMIN; if (!proxyAdmin) { @@ -380,7 +386,7 @@ const printProxyUpgradeCalldata = async ( * Updates proxy contracts to point to new implementations and sets version */ const updateProxyContracts = async ( - chain: string, + networkId: string, rpcUrl: string, avsOwnerKey: string, serviceManagerImplAddress: string, @@ -388,7 +394,7 @@ const updateProxyContracts = async ( ) => { logger.info("🔄 Updating proxy contracts and version..."); - const deployments = await parseDeploymentsFile(chain); + const deployments = await parseDeploymentsFile(networkId); // Update ServiceManager proxy to point to new implementation and update version in one transaction await updateServiceManagerProxyWithVersion( @@ -438,7 +444,11 @@ const updateServiceManagerProxyWithVersion = async ( // about using the default sender when vm.broadcast is called with a key loaded // from an environment variable rather than --private-key. const { privateKeyToAccount } = await import("viem/accounts"); - const avsOwnerAddress = privateKeyToAccount(avsOwnerKey as `0x${string}`).address; + const normalizedAvsKey = ( + avsOwnerKey.startsWith("0x") ? avsOwnerKey : `0x${avsOwnerKey}` + ) as `0x${string}`; + const avsOwnerAddress = privateKeyToAccount(normalizedAvsKey).address; + logger.info(`🔑 Proxy upgrade will be signed by AVS owner: ${avsOwnerAddress}`); const updateArgs = [ "script", @@ -459,8 +469,22 @@ const updateServiceManagerProxyWithVersion = async ( logger.success(`ServiceManager proxy updated and version set to ${version}`); logger.debug(result); } catch (error) { - logger.error(`❌ Failed to update ServiceManager proxy: ${error}`); - throw error; + const errorMessage = error instanceof Error ? error.message : String(error); + + // Forge may fail to fetch the transaction receipt from the RPC even though the + // transaction was successfully broadcast and confirmed on-chain. Detect this + // specific failure and downgrade it to a warning instead of a hard error. + if (errorMessage.includes("Failure on receiving a receipt for")) { + const txHashMatch = errorMessage.match(/receipt for (0x[a-fA-F0-9]{64})/); + const txHash = txHashMatch ? txHashMatch[1] : "unknown"; + logger.warn( + `âš ī¸ Forge could not fetch the transaction receipt (tx: ${txHash}), but the transaction was likely broadcast successfully. ` + + "Verify the transaction status on a block explorer before proceeding." + ); + } else { + logger.error(`❌ Failed to update ServiceManager proxy: ${error}`); + throw error; + } } }; diff --git a/test/cli/handlers/contracts/verify.ts b/test/cli/handlers/contracts/verify.ts index 5d6bf98b..742ef4e2 100644 --- a/test/cli/handlers/contracts/verify.ts +++ b/test/cli/handlers/contracts/verify.ts @@ -109,7 +109,20 @@ export const verifyContracts = async (options: ContractsVerifyOptions) => { contractPath: "lib/snowbridge/contracts/src/AgentExecutor.sol", constructorArgs: [], constructorArgTypes: [] - } + }, + ...(deployments.ProxyAdmin + ? [ + { + name: "ProxyAdmin", + address: deployments.ProxyAdmin, + artifactName: "ProxyAdmin", + contractPath: + "lib/eigenlayer-contracts/lib/openzeppelin-contracts-v4.9.0/contracts/proxy/transparent/ProxyAdmin.sol", + constructorArgs: [], + constructorArgTypes: [] + } + ] + : []) ]; if (!gatewayImplAddress) { diff --git a/test/cli/index.ts b/test/cli/index.ts index 27763287..338bf38f 100644 --- a/test/cli/index.ts +++ b/test/cli/index.ts @@ -289,6 +289,10 @@ contractsCommand .command("upgrade") .description("Upgrade DataHaven AVS contracts by deploying new implementations") .option("--chain ", "Target chain (hoodi, mainnet, anvil)") + .option( + "--environment ", + "Deployment environment (stagenet, testnet, mainnet). Config and deployment files will be prefixed with this value." + ) .option("--rpc-url ", "Chain RPC URL (optional, defaults based on chain)") .option("--private-key-file ", "Path to file containing private key for deployment") .option("--verify", "Verify upgraded contracts on block explorer", false) @@ -303,7 +307,7 @@ contractsCommand ) .hook("preAction", contractsPreActionHook) .action(async (options: any, command: any) => { - // Try to get chain from options or command + // Try to get chain and environment from options or parent command let chain = options.chain; if (!chain && command.parent) { chain = command.parent.getOptionValue("chain"); @@ -312,11 +316,18 @@ contractsCommand chain = command.getOptionValue("chain"); } - printHeader(`Upgrading DataHaven Contracts on ${chain}`); + let environment = options.environment; + if (!environment && command.parent) { + environment = command.parent.getOptionValue("environment"); + } + + const displayName = environment ? `${environment}-${chain}` : chain; + printHeader(`Upgrading DataHaven Contracts on ${displayName}`); try { await contractsUpgrade({ chain: chain, + environment: environment, rpcUrl: options.rpcUrl, privateKeyFile: options.privateKeyFile, verify: options.verify, diff --git a/test/tools/validator-set-submitter/README.md b/test/tools/validator-set-submitter/README.md index 1be7c45e..a398d4ba 100644 --- a/test/tools/validator-set-submitter/README.md +++ b/test/tools/validator-set-submitter/README.md @@ -1,6 +1,6 @@ # Validator Set Submitter -Long-running daemon that automatically submits validator-set updates from Ethereum to DataHaven each era via Snowbridge. +Daemon process that automatically submits validator-set updates from Ethereum to DataHaven each era via Snowbridge. ## How it works @@ -11,7 +11,14 @@ The submitter subscribes to finalized `Session.CurrentIndex` changes on DataHave 3. Is `ExternalIndex` already at or past `targetEra`? 4. Is the current session the last session of the era? -If all preconditions are met, it calls `sendNewValidatorSetForEra` on the ServiceManager contract. Each era gets a single submission attempt — if it fails, the era is missed and the submitter moves on to the next. +If all preconditions are met, it calls `sendNewValidatorSetForEra` on the ServiceManager contract. Submission attempt tracking is in-memory, so each era gets a single submission attempt per process run. If an attempt fails, that era is marked missed for this run and the submitter moves on to the next era. + +### Runtime and restart behavior + +- The submitter does not implement automatic reconnect/backoff for DataHaven session-subscription failures. +- On a subscription error, it logs the error, stops the watcher, and the process exits. +- Run it under a restart policy (for example `systemd` with `Restart=always` or Kubernetes with `restartPolicy: Always`). +- After a restart, a previously failed era may be attempted again if `ExternalIndex` has not advanced past that target era. ## Prerequisites @@ -44,6 +51,47 @@ relayer_fee: "0.2" # metrics_port: 8080 ``` +### Settings reference + +| Field | Type | Required | Default | Description | +|---|---|---|---|---| +| `ethereum_rpc_url` | string | Yes | — | Ethereum JSON-RPC endpoint | +| `datahaven_ws_url` | string | Yes | — | DataHaven WebSocket endpoint | +| `submitter_private_key` | hex string | No\* | — | Private key of the authorized submitter account (`0x` + 64 hex chars) | +| `network_id` | string | No | `"anvil"` | Network ID used to locate `contracts/deployments/{network_id}.json` | +| `service_manager_address` | hex address | No\*\* | — | ServiceManager contract address | +| `execution_fee` | string (ETH) | No | `"0.1"` | Snowbridge execution fee sent as `msg.value` | +| `relayer_fee` | string (ETH) | No | `"0.2"` | Snowbridge relayer fee sent as `msg.value` | +| `metrics_port` | integer | No | `8080` | Prometheus metrics server port (1–65535) | + +\* Required via one of: `--submitter-private-key` flag, `SUBMITTER_PRIVATE_KEY` env var, or `submitter_private_key` in config. +\*\* Required when running in Docker (deployment files are not included in the image). When omitted, the address is read from `contracts/deployments/{network_id}.json`. + +### Private key precedence + +The submitter private key is resolved in this order (first wins): + +1. `--submitter-private-key` CLI flag +2. `SUBMITTER_PRIVATE_KEY` environment variable +3. `submitter_private_key` in the config YAML file + +### Environment variables + +| Variable | Description | +|---|---| +| `SUBMITTER_PRIVATE_KEY` | Submitter private key (see precedence above) | +| `METRICS_PORT` | Override metrics port (takes precedence over config file, but CLI flag wins) | +| `LOG_LEVEL` | Log verbosity: `debug`, `info` (default), `warn`, `error` | + +### CLI flags + +| Flag | Description | +|---|---| +| `--config ` | Path to YAML config file (default: `./tools/validator-set-submitter/config.yml`) | +| `--submitter-private-key ` | Override submitter private key | +| `--metrics-port ` | Override metrics server port | +| `--dry-run` | Log what would be submitted without sending transactions | + ## Usage From the `test/` directory: @@ -65,54 +113,127 @@ bun tools/validator-set-submitter/main.ts run --submitter-private-key 0x... bun tools/validator-set-submitter/main.ts run --dry-run ``` -Private key precedence is: `--submitter-private-key` > `SUBMITTER_PRIVATE_KEY` > `submitter_private_key` in config file. - ## Observability -The submitter exposes a Prometheus metrics server on `metrics_port` (default `8080`): +The submitter exposes an HTTP server on `metrics_port` (default `8080`) with three endpoints: -- `GET /metrics` — Prometheus metrics -- `GET /healthz` — liveness -- `GET /readyz` — readiness (`200` once startup checks pass and watcher is running) +| Endpoint | Purpose | Codes | +|---|---|---| +| `GET /metrics` | Prometheus metrics scrape | `200` | +| `GET /healthz` | Liveness probe | `200` always | +| `GET /readyz` | Readiness probe | `200` when startup checks passed and watcher is running, `503` otherwise | -Key metrics: +### Metrics reference -- `validator_set_submitter_submissions_total{outcome="success|failed|dry_run"}` -- `validator_set_submitter_ticks_total{result="submitted_success|submitted_failed|skipped_*"}` -- `validator_set_submitter_errors_total{type="tick_error|subscription_error"}` -- `validator_set_submitter_missed_eras_total` -- `validator_set_submitter_consecutive_missed_eras` -- `validator_set_submitter_up` -- `validator_set_submitter_ready` +All metrics are prefixed with `validator_set_submitter_`. + +#### Counters + +| Metric | Labels | Description | +|---|---|---| +| `submissions_total` | `outcome`: `success`, `failed`, `dry_run` | Total submission attempts by result | +| `ticks_total` | `result`: `submitted_success`, `submitted_failed`, `skipped_no_active_era`, `skipped_already_submitted`, `skipped_already_confirmed`, `skipped_not_last_session` | Tick evaluation outcomes | +| `errors_total` | `type`: `tick_error`, `subscription_error` | Non-submission errors | +| `missed_eras_total` | — | Total eras where the submission attempt failed | + +#### Gauges + +| Metric | Description | +|---|---| +| `active_era` | Current active era on DataHaven | +| `target_era` | Target era for next submission (`active_era + 1`) | +| `external_index` | Latest confirmed era on-chain | +| `current_session` | Current session number | +| `last_submitted_era` | Last era successfully submitted | +| `consecutive_missed_eras` | Consecutive missed eras (resets to 0 on success) | +| `up` | `1` if watcher is running, `0` if stopped | +| `ready` | `1` if startup checks passed and watcher running, `0` otherwise | + +#### Histograms + +| Metric | Buckets | Description | +|---|---|---| +| `submission_duration_seconds` | 1, 5, 10, 30, 60, 120, 300 | Time from transaction send to receipt | +| `tick_duration_seconds` | 0.1, 0.5, 1, 2, 5, 10, 30 | Time to process one tick | + +### Alerting recommendations + +Example Prometheus alert rules for common failure modes: + +```yaml +groups: + - name: validator-set-submitter + rules: + - alert: SubmitterDown + expr: validator_set_submitter_up == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Validator set submitter is down" + + - alert: ConsecutiveMissedEras + expr: validator_set_submitter_consecutive_missed_eras > 0 + for: 0m + labels: + severity: critical + annotations: + summary: "Submitter has missed {{ $value }} consecutive era(s)" + + - alert: SubmissionErrorsIncreasing + expr: rate(validator_set_submitter_errors_total[5m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Submitter errors increasing (type={{ $labels.type }})" + + - alert: SlowSubmissions + expr: histogram_quantile(0.95, rate(validator_set_submitter_submission_duration_seconds_bucket[15m])) > 120 + for: 5m + labels: + severity: warning + annotations: + summary: "95th percentile submission duration exceeds 120s" +``` ## Docker -Build the image from the repository root: +A pre-built image is published to Docker Hub on every push to `main`: -```bash -docker build -f test/tools/validator-set-submitter/Dockerfile \ - -t datahavenxyz/validator-set-submitter:local . +``` +datahavenxyz/validator-set-submitter:latest +datahavenxyz/validator-set-submitter:sha- ``` -Run the submitter with mounted config and env private key: +Run the submitter with a mounted config and private key: ```bash docker run --rm \ - -v "$(pwd)/test/tools/validator-set-submitter/config.yml:/config/config.yml:ro" \ + -v "$(pwd)/config.yml:/config/config.yml:ro" \ -e SUBMITTER_PRIVATE_KEY=0x... \ - datahavenxyz/validator-set-submitter:local + datahavenxyz/validator-set-submitter:latest ``` Dry run: ```bash docker run --rm \ - -v "$(pwd)/test/tools/validator-set-submitter/config.yml:/config/config.yml:ro" \ + -v "$(pwd)/config.yml:/config/config.yml:ro" \ -e SUBMITTER_PRIVATE_KEY=0x... \ - datahavenxyz/validator-set-submitter:local --dry-run + datahavenxyz/validator-set-submitter:latest --dry-run ``` -The Docker image does not include `contracts/deployments/*.json`. In containerized runs, set `service_manager_address` in your config. +The Docker image does not include `contracts/deployments/*.json`. Set `service_manager_address` explicitly in your config. + +### Building locally + +To build the image from the repository root: + +```bash +docker build -f test/tools/validator-set-submitter/Dockerfile \ + -t datahavenxyz/validator-set-submitter:local . +``` ## Startup checks @@ -127,3 +248,42 @@ If any check fails, the process exits immediately. ## Shutdown Send `SIGINT` (Ctrl+C) or `SIGTERM`. The submitter unsubscribes from session changes and tears down connections cleanly. + +## Troubleshooting + +### Startup exits immediately + +| Symptom | Cause | Fix | +|---|---|---| +| `Cannot connect to Ethereum RPC` | Ethereum endpoint unreachable | Verify `ethereum_rpc_url` is correct and the node is running | +| `Cannot connect to DataHaven WS` | DataHaven endpoint unreachable | Verify `datahaven_ws_url` is correct and the node accepts WebSocket connections | +| `Account 0x... is not the authorized submitter` | Private key does not match the on-chain submitter | Call `setValidatorSetSubmitter` on the ServiceManager with the correct address, or fix the private key | +| `Missing submitter private key` | No key provided | Supply via `--submitter-private-key`, `SUBMITTER_PRIVATE_KEY` env var, or `submitter_private_key` in config | +| `Config file not found` | Wrong `--config` path | Check the path and ensure the file exists | + +### Missed eras + +When the submitter fails to submit for an era, `missed_eras_total` increments and `consecutive_missed_eras` increases. Common causes: + +- **Transaction reverted** — the submitter account may have insufficient ETH to cover `execution_fee + relayer_fee`. Fund the account. +- **RPC timeout** — the Ethereum RPC may be overloaded or unreachable. Check RPC health and consider a dedicated endpoint. +- **Snowbridge congestion** — if the bridge queue is full, submissions may fail. Check Snowbridge relayer status. +- **Already confirmed** — if another process submitted the era, the submitter skips it (this is normal, not an error). + +Check `LOG_LEVEL=debug` output for detailed tick-by-tick reasoning. + +### Process exits after running for a while + +| Symptom | Cause | Fix | +|---|---|---| +| `Session subscription error: ...` followed by process exit | DataHaven WebSocket subscription dropped and the submitter has no built-in reconnect loop | Ensure WebSocket stability and run the submitter with automatic restarts (`systemd`/Kubernetes) | + +### Enabling debug logs + +Set the `LOG_LEVEL` environment variable to `debug` for verbose output: + +```bash +LOG_LEVEL=debug bun tools/validator-set-submitter/main.ts run +``` + +Or in Docker/Kubernetes, add `LOG_LEVEL: "debug"` to the environment. Debug logs include per-tick skip reasons and detailed transaction information.