mirror of
https://github.com/fleetdm/fleet
synced 2026-05-23 08:58:41 +00:00
Clear cron schedule errors before each run (#26775)
For #26657 This PR fixes an issue that causes cron monitoring alerts to be sent repeatedly after the first instance; that is, if a cron job fails once then the monitor reports the failure every time it runs until the server is restarted. This was due to the errors being held in the Schedule object which persists for the lifetime of the process, rather than being recreated for each run. The solution is to clear the errors from the Schedule object before each run. Added a test that fails on main and passes on this branch.
This commit is contained in:
parent
e552cd3c8e
commit
1a1d7bae78
2 changed files with 36 additions and 0 deletions
|
|
@ -461,6 +461,8 @@ func (s *Schedule) runWithStats(statsType fleet.CronStatsType) {
|
|||
|
||||
// runAllJobs runs all jobs in the schedule.
|
||||
func (s *Schedule) runAllJobs() {
|
||||
// Clear errors from the schedule before each run.
|
||||
s.errors = make(fleet.CronScheduleErrors)
|
||||
for _, job := range s.jobs {
|
||||
level.Debug(s.logger).Log("msg", "starting", "jobID", job.ID)
|
||||
if err := runJob(s.ctx, job.Fn); err != nil {
|
||||
|
|
|
|||
|
|
@ -275,6 +275,40 @@ func TestMultipleJobsInOrder(t *testing.T) {
|
|||
require.Contains(t, test_job_4_err.Error(), "oh no\n")
|
||||
}
|
||||
|
||||
func TestClearScheduleErrors(t *testing.T) {
|
||||
os.Setenv("TEST_CRON_NO_RECOVER", "0")
|
||||
defer os.Unsetenv("TEST_CRON_NO_RECOVER")
|
||||
|
||||
ctx := context.Background()
|
||||
errored := false
|
||||
|
||||
s := New(ctx, "test_schedule", "test_instance", 1000*time.Millisecond, NopLocker{}, SetUpMockStatsStore("test_schedule", fleet.CronStats{
|
||||
ID: 1,
|
||||
StatsType: fleet.CronStatsTypeScheduled,
|
||||
Name: "test_schedule_clear_errors",
|
||||
Instance: "test_instance",
|
||||
CreatedAt: time.Now().Truncate(1 * time.Second),
|
||||
UpdatedAt: time.Now().Truncate(1 * time.Second),
|
||||
Status: fleet.CronStatsStatusCompleted,
|
||||
}),
|
||||
WithJob("test_job_1", func(ctx context.Context) error {
|
||||
if !errored {
|
||||
errored = true
|
||||
return errors.New("oh well")
|
||||
}
|
||||
return nil
|
||||
}),
|
||||
)
|
||||
|
||||
// First run should return 1 error.
|
||||
s.runAllJobs()
|
||||
require.Equal(t, 1, len(s.errors))
|
||||
|
||||
// Second run should return no errors.
|
||||
s.runAllJobs()
|
||||
require.Equal(t, 0, len(s.errors))
|
||||
}
|
||||
|
||||
func TestConfigReloadCheck(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
initialSchedInterval := 1 * time.Millisecond
|
||||
|
|
|
|||
Loading…
Reference in a new issue