mirror of
https://github.com/fleetdm/fleet
synced 2026-05-24 09:28:54 +00:00
Improve integration workflow robustness with health checks and detailed enrollment logging. (#32348)
Fixes #32347 # Checklist for submitter ## Testing - [x] QA'd all new/changed functionality manually <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - New Features - Added health checks and elapsed-time logging during server startup and host enrollment in the integration workflow. - Bug Fixes - Reduced flakiness by adding bounded login retries and server readiness verification before proceeding. - Tests - Periodic diagnostics for host enrollment status to aid visibility during runs. - Chores - Increased server startup timeout from 10 to 15 minutes in the integration workflow. - Minor workflow formatting cleanups for consistency. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
parent
c920007851
commit
1d9131a602
1 changed files with 59 additions and 12 deletions
71
.github/workflows/integration.yml
vendored
71
.github/workflows/integration.yml
vendored
|
|
@ -89,20 +89,43 @@ jobs:
|
|||
check_artifacts: true
|
||||
|
||||
- name: Run Fleet server
|
||||
timeout-minutes: 10
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
chmod +x ./build/fleetctl
|
||||
./build/fleetctl preview --no-hosts --disable-open-browser
|
||||
./build/fleetctl config set --address ${{ needs.gen.outputs.address }}
|
||||
./build/fleetctl get enroll-secret
|
||||
docker compose -f ~/.fleet/preview/docker-compose.yml logs --follow fleet01 fleet02 &
|
||||
|
||||
# Ensure Fleet server is responding before waiting for enrollments
|
||||
echo "Checking Fleet server health..."
|
||||
HEALTH_CHECK_COUNT=0
|
||||
until curl -s -o /dev/null -w "%{http_code}" http://localhost:1337/healthz | grep -q "200"; do
|
||||
HEALTH_CHECK_COUNT=$((HEALTH_CHECK_COUNT + 1))
|
||||
if [ $HEALTH_CHECK_COUNT -ge 30 ]; then
|
||||
echo "ERROR: Fleet server not responding after 150 seconds"
|
||||
docker ps -a --filter "name=fleet"
|
||||
exit 1
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
echo "Fleet server is responding"
|
||||
|
||||
# Wait for all of the hosts to be enrolled
|
||||
EXPECTED=3
|
||||
ENROLLMENT_START=$(date +%s)
|
||||
until [ $(./build/fleetctl get hosts --json | grep -v "No hosts found" | wc -l | tee hostcount) -ge $EXPECTED ]; do
|
||||
echo -n "Waiting for hosts to enroll: "
|
||||
CURRENT_TIME=$(date +%s)
|
||||
ELAPSED=$((CURRENT_TIME - ENROLLMENT_START))
|
||||
echo -n "Waiting for hosts to enroll (${ELAPSED}s): "
|
||||
cat hostcount | xargs echo -n
|
||||
echo " / $EXPECTED"
|
||||
./build/fleetctl get hosts --json
|
||||
|
||||
# Show diagnostic info every 60 seconds
|
||||
if [ $((ELAPSED % 60)) -lt 10 ]; then
|
||||
./build/fleetctl get hosts --json || true
|
||||
fi
|
||||
|
||||
sleep 10
|
||||
done
|
||||
echo "Success! $EXPECTED hosts enrolled."
|
||||
|
|
@ -172,9 +195,29 @@ jobs:
|
|||
run: |
|
||||
chmod +x ./build/fleetctl
|
||||
./build/fleetctl config set --address ${{ needs.gen.outputs.address }}
|
||||
until ./build/fleetctl login --email admin@example.com --password preview1337#
|
||||
do
|
||||
echo "Retrying in 5s..."
|
||||
|
||||
# Wait for Fleet server to be reachable first
|
||||
echo "Waiting for Fleet server to be ready..."
|
||||
ATTEMPT=0
|
||||
until curl -s -o /dev/null -w "%{http_code}" ${{ needs.gen.outputs.address }}/healthz | grep -q "200"; do
|
||||
ATTEMPT=$((ATTEMPT + 1))
|
||||
if [ $ATTEMPT -ge 60 ]; then
|
||||
echo "ERROR: Fleet server not reachable after 5 minutes"
|
||||
exit 1
|
||||
fi
|
||||
echo "Waiting for server... attempt $ATTEMPT/60"
|
||||
sleep 5
|
||||
done
|
||||
echo "Fleet server is responding, attempting login..."
|
||||
|
||||
ATTEMPT=0
|
||||
until ./build/fleetctl login --email admin@example.com --password preview1337#; do
|
||||
ATTEMPT=$((ATTEMPT + 1))
|
||||
if [ $ATTEMPT -ge 30 ]; then
|
||||
echo "ERROR: Failed to login after $ATTEMPT attempts"
|
||||
exit 1
|
||||
fi
|
||||
echo "Login attempt $ATTEMPT failed, retrying in 5s..."
|
||||
sleep 5
|
||||
done
|
||||
TOKEN=$(cat ~/.fleet/config| grep token | awk '{ print $2 }')
|
||||
|
|
@ -229,9 +272,12 @@ jobs:
|
|||
echo "Hostname: $(hostname -s)"
|
||||
fleetctl package --type pkg --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug
|
||||
sudo installer -pkg fleet-osquery.pkg -target /
|
||||
ENROLLMENT_START=$(date +%s)
|
||||
until fleetctl get hosts | grep -iF $(hostname -s);
|
||||
do
|
||||
echo "Awaiting enrollment..."
|
||||
CURRENT_TIME=$(date +%s)
|
||||
ELAPSED=$((CURRENT_TIME - ENROLLMENT_START))
|
||||
echo "Awaiting enrollment... (${ELAPSED}s)"
|
||||
sleep 10
|
||||
done
|
||||
|
||||
|
|
@ -290,8 +336,7 @@ jobs:
|
|||
|
||||
- name: Wait until fleet address is reachable and fleet responds
|
||||
run: |
|
||||
until curl -v -fail ${{ needs.gen.outputs.address }}/version;
|
||||
do
|
||||
until curl -v -fail ${{ needs.gen.outputs.address }}/version; do
|
||||
echo "Awaiting until fleet server responds..."
|
||||
sleep 10
|
||||
done
|
||||
|
|
@ -307,9 +352,11 @@ jobs:
|
|||
echo "Hostname: $(hostname -s)"
|
||||
./build/fleetctl package --type deb --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug
|
||||
sudo dpkg -i fleet-osquery*
|
||||
until fleetctl get hosts | grep -iF $(hostname -s);
|
||||
do
|
||||
echo "Awaiting enrollment..."
|
||||
ENROLLMENT_START=$(date +%s)
|
||||
until fleetctl get hosts | grep -iF $(hostname -s); do
|
||||
CURRENT_TIME=$(date +%s)
|
||||
ELAPSED=$((CURRENT_TIME - ENROLLMENT_START))
|
||||
echo "Awaiting enrollment... (${ELAPSED}s)"
|
||||
sudo systemctl status orbit.service || true
|
||||
sleep 10
|
||||
done
|
||||
|
|
|
|||
Loading…
Reference in a new issue