mirror of
https://github.com/fleetdm/fleet
synced 2026-05-24 09:28:54 +00:00
<!-- Add the related story/sub-task/bug number, like Resolves #123, or remove if NA --> **Related issue:** Resolves #35239 Docs PR: #39770 ## Remote trigger approach When FLEET_VULNERABILITIES_DISABLE_SCHEDULE=true, the main Fleet server registers a RemoteTriggerSchedule instead of the real vulnerability schedule. When a user runs fleetctl trigger --name=vulnerabilities: 1. Main server: RemoteTriggerSchedule.Trigger() inserts a cron_stats record with status=queued. 2. Worker server: The vulnerability schedule runs with WithTriggerPollInterval(60s), which starts a poll goroutine that checks the DB every 60s for queued records. 3. Pickup: When the poll goroutine finds a queued record, it sends the stats ID on the trigger channel (non-blocking). 4. Execution: The trigger handler acquires the lock, claims the record via ClaimCronStats (updating status to pending and instance to the actual worker ID), runs all jobs, and marks it completed. Key details: - The trigger channel carries an int: 0 for in-process triggers, >0 for DB-polled stats IDs. This lets runWithStats reuse the existing record instead of inserting a new one. - Both Schedule.Trigger() and RemoteTriggerSchedule.Trigger() treat pending and queued as conflicts to prevent duplicate runs. - Queued records expire after 2 hours via CleanupCronStats, same as pending records. - The poll goroutine only signals; it doesn't modify DB state. The handler claims when ready. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added support for remote trigger execution in vulnerability scheduling workflows. * Implemented periodic polling mechanism to detect and process externally triggered vulnerability scans. * **Bug Fixes** * Enhanced trigger status tracking to properly handle queued scan jobs. * **Improvements** * Strengthened scheduling system with improved timeout and cancellation management capabilities. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
140 lines
4.5 KiB
Go
140 lines
4.5 KiB
Go
package mysql
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"encoding/json"
|
|
|
|
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
|
|
"github.com/fleetdm/fleet/v4/server/fleet"
|
|
"github.com/jmoiron/sqlx"
|
|
)
|
|
|
|
// GetLatestCronStats returns a slice of no more than two cron stats records, where index 0 (if
|
|
// present) is the most recently created scheduled run, and index 1 (if present) represents a
|
|
// triggered run that is currently pending/queued.
|
|
func (ds *Datastore) GetLatestCronStats(ctx context.Context, name string) ([]fleet.CronStats, error) {
|
|
stmt := `
|
|
(
|
|
SELECT
|
|
id, name, instance, stats_type, status, created_at, updated_at
|
|
FROM
|
|
cron_stats
|
|
WHERE
|
|
name = ?
|
|
AND stats_type = 'scheduled'
|
|
AND (status = 'pending' OR status = 'completed')
|
|
ORDER BY
|
|
created_at DESC
|
|
LIMIT 1)
|
|
UNION
|
|
(
|
|
SELECT
|
|
id, name, instance, stats_type, status, created_at, updated_at
|
|
FROM
|
|
cron_stats
|
|
WHERE
|
|
name = ?
|
|
AND stats_type = 'triggered'
|
|
AND (status = 'pending' OR status = 'completed' OR status = 'queued')
|
|
ORDER BY
|
|
created_at DESC
|
|
LIMIT 1)`
|
|
|
|
var res []fleet.CronStats
|
|
err := sqlx.SelectContext(ctx, ds.reader(ctx), &res, stmt, name, name)
|
|
if err != nil {
|
|
return []fleet.CronStats{}, ctxerr.Wrap(ctx, err, "select cron stats")
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
func (ds *Datastore) InsertCronStats(ctx context.Context, statsType fleet.CronStatsType, name string, instance string, status fleet.CronStatsStatus) (int, error) {
|
|
stmt := `INSERT INTO cron_stats (stats_type, name, instance, status) VALUES (?, ?, ?, ?)`
|
|
|
|
res, err := ds.writer(ctx).ExecContext(ctx, stmt, statsType, name, instance, status)
|
|
if err != nil {
|
|
return 0, ctxerr.Wrap(ctx, err, "insert cron stats")
|
|
}
|
|
id, err := res.LastInsertId()
|
|
if err != nil {
|
|
return 0, ctxerr.Wrap(ctx, err, "insert cron stats last insert id")
|
|
}
|
|
|
|
return int(id), nil
|
|
}
|
|
|
|
func (ds *Datastore) UpdateCronStats(ctx context.Context, id int, status fleet.CronStatsStatus, cronErrors *fleet.CronScheduleErrors) error {
|
|
stmt := `UPDATE cron_stats SET status = ?, errors = ? WHERE id = ?`
|
|
|
|
errorsJSON := sql.NullString{}
|
|
if len(*cronErrors) > 0 {
|
|
b, err := json.Marshal(cronErrors)
|
|
if err == nil {
|
|
errorsJSON.String = string(b)
|
|
errorsJSON.Valid = true
|
|
}
|
|
}
|
|
|
|
if _, err := ds.writer(ctx).ExecContext(ctx, stmt, status, errorsJSON, id); err != nil {
|
|
return ctxerr.Wrap(ctx, err, "update cron stats")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (ds *Datastore) ClaimCronStats(ctx context.Context, id int, instance string, status fleet.CronStatsStatus) error {
|
|
stmt := `UPDATE cron_stats SET status = ?, instance = ? WHERE id = ?`
|
|
|
|
if _, err := ds.writer(ctx).ExecContext(ctx, stmt, status, instance, id); err != nil {
|
|
return ctxerr.Wrap(ctx, err, "claim cron stats")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (ds *Datastore) UpdateAllCronStatsForInstance(ctx context.Context, instance string, fromStatus fleet.CronStatsStatus, toStatus fleet.CronStatsStatus) error {
|
|
stmt := `UPDATE cron_stats SET status = ? WHERE instance = ? AND status = ?`
|
|
|
|
if _, err := ds.writer(ctx).ExecContext(ctx, stmt, toStatus, instance, fromStatus); err != nil {
|
|
return ctxerr.Wrap(ctx, err, "update all cron stats for instance")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (ds *Datastore) CleanupCronStats(ctx context.Context) error {
|
|
return ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error {
|
|
// Delete cron_stats entries that are older than two days.
|
|
deleteStmt := `DELETE FROM cron_stats WHERE created_at < DATE_SUB(NOW(), INTERVAL 2 DAY)`
|
|
if _, err := tx.ExecContext(ctx, deleteStmt); err != nil {
|
|
return ctxerr.Wrap(ctx, err, "deleting old cron stats")
|
|
}
|
|
// Mark cron_stats entries as expired if:
|
|
// 1. Pending or queued for >2 hours and no active lock (instance likely crashed), OR
|
|
// 2. Pending or queued for >12 hours regardless of lock state (hard cap for hung jobs).
|
|
//
|
|
// NOTE: The lock check assumes locks.name matches cron_stats.name. Schedules using
|
|
// WithAltLockID (e.g., "leader", "worker") store locks under a different name, so
|
|
// the NOT EXISTS check won't find their lock and they fall back to the 2-hour timeout.
|
|
updateStmt := `
|
|
UPDATE cron_stats cs
|
|
SET cs.status = ?
|
|
WHERE cs.status IN (?, ?)
|
|
AND (
|
|
(cs.created_at < DATE_SUB(NOW(), INTERVAL 2 HOUR)
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM locks l
|
|
WHERE l.name = cs.name
|
|
AND l.expires_at >= CURRENT_TIMESTAMP
|
|
))
|
|
OR cs.created_at < DATE_SUB(NOW(), INTERVAL 12 HOUR)
|
|
)`
|
|
if _, err := tx.ExecContext(ctx, updateStmt, fleet.CronStatsStatusExpired, fleet.CronStatsStatusPending, fleet.CronStatsStatusQueued); err != nil {
|
|
return ctxerr.Wrap(ctx, err, "updating expired cron stats")
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|