From 22dd392da724981927caa5d04c6e2bf690ad5c96 Mon Sep 17 00:00:00 2001 From: Victor Lyuboslavsky Date: Wed, 28 Feb 2024 10:53:37 -0600 Subject: [PATCH] Fixing code review comments. (#17240) Fixing code review comments from https://github.com/fleetdm/fleet/pull/16855 Also, moving `CleanupDistributedQueryCampaigns` back to once an hour, so that it is not disabled. --- cmd/fleet/cron.go | 26 +++++++++++++++----------- cmd/fleet/serve.go | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/cmd/fleet/cron.go b/cmd/fleet/cron.go index 2400a29598..78873db9b6 100644 --- a/cmd/fleet/cron.go +++ b/cmd/fleet/cron.go @@ -705,7 +705,6 @@ func newCleanupsAndAggregationSchedule( ctx context.Context, instanceID string, ds fleet.Datastore, - lq fleet.LiveQueryStore, logger kitlog.Logger, enrollHostLimiter fleet.EnrollHostLimiter, config *config.FleetConfig, @@ -721,6 +720,13 @@ func newCleanupsAndAggregationSchedule( schedule.WithAltLockID("leader"), schedule.WithLogger(kitlog.With(logger, "cron", name)), // Run cleanup jobs first. + schedule.WithJob( + "distributed_query_campaigns", + func(ctx context.Context) error { + _, err := ds.CleanupDistributedQueryCampaigns(ctx, time.Now().UTC()) + return err + }, + ), schedule.WithJob( "incoming_hosts", func(ctx context.Context) error { @@ -846,16 +852,16 @@ func newFrequentCleanupsSchedule( s := schedule.New( ctx, name, instanceID, defaultInterval, ds, ds, // Using leader for the lock to be backwards compatilibity with old deployments. - schedule.WithAltLockID("leader"), + schedule.WithAltLockID("leader_frequent_cleanups"), schedule.WithLogger(kitlog.With(logger, "cron", name)), // Run cleanup jobs first. schedule.WithJob( - "distributed_query_campaigns", + "redis_live_queries", func(ctx context.Context) error { - _, err := ds.CleanupDistributedQueryCampaigns(ctx, time.Now().UTC()) - if err != nil { - return err - } + // It's necessary to avoid lingering live queries in case of: + // - (Unknown) bug in the implementation, or, + // - Redis is so overloaded already that the lq.StopQuery in svc.CompleteCampaign fails to execute, or, + // - MySQL is so overloaded that ds.SaveDistributedQueryCampaign in svc.CompleteCampaign fails to execute. names, err := lq.LoadActiveQueryNames() if err != nil { return err @@ -865,10 +871,8 @@ func newFrequentCleanupsSchedule( if err != nil { return err } - if err := lq.CleanupInactiveQueries(ctx, completed); err != nil { - return err - } - return nil + err = lq.CleanupInactiveQueries(ctx, completed) + return err }, ), ) diff --git a/cmd/fleet/serve.go b/cmd/fleet/serve.go index c60f5d04c2..729cc31801 100644 --- a/cmd/fleet/serve.go +++ b/cmd/fleet/serve.go @@ -697,7 +697,7 @@ the way that the Fleet server works. commander = apple_mdm.NewMDMAppleCommander(mdmStorage, mdmPushService) } return newCleanupsAndAggregationSchedule( - ctx, instanceID, ds, liveQueryStore, logger, redisWrapperDS, &config, commander, + ctx, instanceID, ds, logger, redisWrapperDS, &config, commander, ) }, ); err != nil {