diff --git a/changes/42327-apple-profile-retries b/changes/42327-apple-profile-retries new file mode 100644 index 0000000000..fcd2a02292 --- /dev/null +++ b/changes/42327-apple-profile-retries @@ -0,0 +1 @@ +- Increased automatic retry limit for failed Apple (macOS, iOS, iPadOS) configuration profiles from 1 to 3. Windows profiles remain at 1 retry. diff --git a/server/datastore/mysql/apple_mdm_test.go b/server/datastore/mysql/apple_mdm_test.go index 8860db8229..65eebc3aec 100644 --- a/server/datastore/mysql/apple_mdm_test.go +++ b/server/datastore/mysql/apple_mdm_test.go @@ -4617,7 +4617,31 @@ func testSetVerifiedMacOSProfiles(t *testing.T, ds *Datastore) { return err }) - // after the grace period and one retry attempt, status changes to "failed" if a profile is missing (i.e. not installed) + // after the grace period and max retry attempts, status changes to "failed" if a profile is missing (i.e. not installed) + for missingRetry := range fleetmdm.MaxAppleProfileRetries { + require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, hosts[2], profilesByIdentifier([]*fleet.HostMacOSProfile{ + { + Identifier: cp1.Identifier, + DisplayName: cp1.Name, + InstallDate: time.Now(), + }, + { + Identifier: cp2.Identifier, + DisplayName: cp2.Name, + InstallDate: time.Now(), + }, + }))) + if missingRetry == 0 { + expectedHostMDMStatus[hosts[2].ID][cp1.Identifier] = fleet.MDMDeliveryVerified // cp1 can go from pending to verified + } + expectedHostMDMStatus[hosts[2].ID][cp3.Identifier] = fleet.MDMDeliveryPending // retry for cp3 + expectedHostMDMStatus[hosts[2].ID][cp4.Identifier] = fleet.MDMDeliveryPending // retry for cp4 + checkHostMDMProfileStatuses() + // simulate retry command acknowledged by setting status to "verifying" + adHocSetVerifying(hosts[2].UUID, cp3.Identifier) + adHocSetVerifying(hosts[2].UUID, cp4.Identifier) + } + // report osquery results again with cp3 and cp4 still missing after max retries require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, hosts[2], profilesByIdentifier([]*fleet.HostMacOSProfile{ { Identifier: cp1.Identifier, @@ -4630,32 +4654,31 @@ func testSetVerifiedMacOSProfiles(t *testing.T, ds *Datastore) { InstallDate: time.Now(), }, }))) - expectedHostMDMStatus[hosts[2].ID][cp1.Identifier] = fleet.MDMDeliveryVerified // cp1 can go from pending to verified - expectedHostMDMStatus[hosts[2].ID][cp3.Identifier] = fleet.MDMDeliveryPending // first retry for cp3 - expectedHostMDMStatus[hosts[2].ID][cp4.Identifier] = fleet.MDMDeliveryPending // first retry for cp4 - checkHostMDMProfileStatuses() - // simulate retry command acknowledged by setting status to "verifying" - adHocSetVerifying(hosts[2].UUID, cp3.Identifier) - adHocSetVerifying(hosts[2].UUID, cp4.Identifier) - // report osquery results again with cp3 and cp4 still missing - require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, hosts[2], profilesByIdentifier([]*fleet.HostMacOSProfile{ - { - Identifier: cp1.Identifier, - DisplayName: cp1.Name, - InstallDate: time.Now(), - }, - { - Identifier: cp2.Identifier, - DisplayName: cp2.Name, - InstallDate: time.Now(), - }, - }))) - expectedHostMDMStatus[hosts[2].ID][cp3.Identifier] = fleet.MDMDeliveryFailed // still missing after retry so expect cp3 to fail - expectedHostMDMStatus[hosts[2].ID][cp4.Identifier] = fleet.MDMDeliveryFailed // still missing after retry so expect cp4 to fail + expectedHostMDMStatus[hosts[2].ID][cp3.Identifier] = fleet.MDMDeliveryFailed // still missing after max retries so expect cp3 to fail + expectedHostMDMStatus[hosts[2].ID][cp4.Identifier] = fleet.MDMDeliveryFailed // still missing after max retries so expect cp4 to fail checkHostMDMProfileStatuses() - // after the grace period and one retry attempt, status changes to "failed" if a profile is outdated (i.e. installed + // after the grace period and max retry attempts, status changes to "failed" if a profile is outdated (i.e. installed // before the updated at timestamp of the profile) + for range fleetmdm.MaxAppleProfileRetries { + require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, hosts[2], profilesByIdentifier([]*fleet.HostMacOSProfile{ + { + Identifier: cp1.Identifier, + DisplayName: cp1.Name, + InstallDate: time.Now(), + }, + { + Identifier: cp2.Identifier, + DisplayName: cp2.Name, + InstallDate: time.Now().Add(-48 * time.Hour), + }, + }))) + expectedHostMDMStatus[hosts[2].ID][cp2.Identifier] = fleet.MDMDeliveryPending // retry for cp2 + checkHostMDMProfileStatuses() + // simulate retry command acknowledged by setting status to "verifying" + adHocSetVerifying(hosts[2].UUID, cp2.Identifier) + } + // report osquery results again with cp2 still outdated after max retries require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, hosts[2], profilesByIdentifier([]*fleet.HostMacOSProfile{ { Identifier: cp1.Identifier, @@ -4668,24 +4691,7 @@ func testSetVerifiedMacOSProfiles(t *testing.T, ds *Datastore) { InstallDate: time.Now().Add(-48 * time.Hour), }, }))) - expectedHostMDMStatus[hosts[2].ID][cp2.Identifier] = fleet.MDMDeliveryPending // first retry for cp2 - checkHostMDMProfileStatuses() - // simulate retry command acknowledged by setting status to "verifying" - adHocSetVerifying(hosts[2].UUID, cp2.Identifier) - // report osquery results again with cp2 still outdated - require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, hosts[2], profilesByIdentifier([]*fleet.HostMacOSProfile{ - { - Identifier: cp1.Identifier, - DisplayName: cp1.Name, - InstallDate: time.Now(), - }, - { - Identifier: cp2.Identifier, - DisplayName: cp2.Name, - InstallDate: time.Now().Add(-48 * time.Hour), - }, - }))) - expectedHostMDMStatus[hosts[2].ID][cp2.Identifier] = fleet.MDMDeliveryFailed // still outdated after retry so expect cp2 to fail + expectedHostMDMStatus[hosts[2].ID][cp2.Identifier] = fleet.MDMDeliveryFailed // still outdated after max retries so expect cp2 to fail checkHostMDMProfileStatuses() } @@ -6266,7 +6272,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { t.Run("MissingProfileWithRetry", func(t *testing.T) { defer cleanupProfiles(t) // missing profile, verifying and verified statuses should change to failed after the grace - // period and one retry + // period and max retries cases := []testCase{ { name: "PendingThenMissing", @@ -6316,11 +6322,18 @@ func TestMDMAppleProfileVerification(t *testing.T) { } if tc.initialStatus != fleet.MDMDeliveryPending { - // after retry, assume successful install profile command so status should be verifying + // simulate retry cycles up to max retries + for retry := uint(1); retry < fleetmdm.MaxAppleProfileRetries; retry++ { + // after retry, assume successful install profile command so status should be verifying + upsertHostCPs([]*fleet.Host{h}, []*fleet.MDMAppleConfigProfile{cp}, fleet.MDMOperationTypeInstall, &fleet.MDMDeliveryVerifying, ctx, ds, t) + // report osquery results with profile still missing + require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, h, profilesByIdentifier(reportedProfiles))) + // still retrying so status should be pending + require.NoError(t, checkHostStatus(t, h, fleet.MDMDeliveryPending, ""), tc.name) + } + // final retry: after max retries, expect failure upsertHostCPs([]*fleet.Host{h}, []*fleet.MDMAppleConfigProfile{cp}, fleet.MDMOperationTypeInstall, &fleet.MDMDeliveryVerifying, ctx, ds, t) - // report osquery results require.NoError(t, apple_mdm.VerifyHostMDMProfiles(ctx, ds, h, profilesByIdentifier(reportedProfiles))) - // now we see the expected status require.NoError(t, checkHostStatus(t, h, tc.expectedStatus, string(fleet.HostMDMProfileDetailFailedWasVerifying)), tc.name) // grace period expired, max retries so check expected status } } @@ -6373,7 +6386,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { } // initialize with no remaining retries - initializeProfile(t, h, cp, tc.initialStatus, 1) + initializeProfile(t, h, cp, tc.initialStatus, fleetmdm.MaxAppleProfileRetries) // within grace period setProfileUploadedAt(t, cp, twoMinutesAgo) @@ -6381,7 +6394,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { require.NoError(t, checkHostStatus(t, h, tc.initialStatus, "")) // outdated profiles are treated similar to missing profiles so status doesn't change if within grace period // reinitalize with no remaining retries - initializeProfile(t, h, cp, tc.initialStatus, 1) + initializeProfile(t, h, cp, tc.initialStatus, fleetmdm.MaxAppleProfileRetries) // outside grace period setProfileUploadedAt(t, cp, twoHoursAgo) @@ -6436,7 +6449,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { } // initialize with no remaining retries - initializeProfile(t, h, cp, tc.initialStatus, 1) + initializeProfile(t, h, cp, tc.initialStatus, fleetmdm.MaxAppleProfileRetries) // within grace period setProfileUploadedAt(t, cp, twoMinutesAgo) @@ -6444,7 +6457,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { require.NoError(t, checkHostStatus(t, h, tc.expectedStatus, tc.expectedDetail)) // if found within grace period, verifying status can become verified so check expected status // reinitializewith no remaining retries - initializeProfile(t, h, cp, tc.initialStatus, 1) + initializeProfile(t, h, cp, tc.initialStatus, fleetmdm.MaxAppleProfileRetries) // outside grace period setProfileUploadedAt(t, cp, twoHoursAgo) @@ -6504,7 +6517,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { } // initialize with no remaining retries - initializeProfile(t, h, cp, tc.initialStatus, 1) + initializeProfile(t, h, cp, tc.initialStatus, fleetmdm.MaxAppleProfileRetries) // within grace period setProfileUploadedAt(t, cp, twoMinutesAgo) @@ -6512,7 +6525,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { require.NoError(t, checkHostStatus(t, h, tc.expectedStatus, tc.expectedDetail)) // if found within grace period, verifying status can become verified so check expected status // reinitialize with no remaining retries - initializeProfile(t, h, cp, tc.initialStatus, 1) + initializeProfile(t, h, cp, tc.initialStatus, fleetmdm.MaxAppleProfileRetries) // outside grace period setProfileUploadedAt(t, cp, twoHoursAgo) @@ -6547,7 +6560,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { initialStatus := fleet.MDMDeliveryVerifying // initialize with no remaining retries - initializeProfile(t, h, stored0, initialStatus, 1) + initializeProfile(t, h, stored0, initialStatus, fleetmdm.MaxAppleProfileRetries) // within grace period setProfileUploadedAt(t, stored0, twoMinutesAgo) // host is out of date but still within grace period @@ -6555,7 +6568,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { require.NoError(t, checkHostStatus(t, h, fleet.MDMDeliveryVerifying, "")) // no change // reinitialize with no remaining retries - initializeProfile(t, h, stored0, initialStatus, 1) + initializeProfile(t, h, stored0, initialStatus, fleetmdm.MaxAppleProfileRetries) // outside grace period setProfileUploadedAt(t, stored0, twoHoursAgo) // host is out of date and grace period has passed @@ -6563,7 +6576,7 @@ func TestMDMAppleProfileVerification(t *testing.T) { require.NoError(t, checkHostStatus(t, h, fleet.MDMDeliveryFailed, string(fleet.HostMDMProfileDetailFailedWasVerifying))) // set to failed // reinitialize with no remaining retries - initializeProfile(t, h, stored0, initialStatus, 1) + initializeProfile(t, h, stored0, initialStatus, fleetmdm.MaxAppleProfileRetries) // save a copy of the config profile to team 1 cp.TeamID = ptr.Uint(1) diff --git a/server/datastore/mysql/microsoft_mdm.go b/server/datastore/mysql/microsoft_mdm.go index 636f664cd3..68f5e87c50 100644 --- a/server/datastore/mysql/microsoft_mdm.go +++ b/server/datastore/mysql/microsoft_mdm.go @@ -534,7 +534,7 @@ func updateMDMWindowsHostProfileStatusFromResponseDB( for _, hp := range matchingHostProfiles { payload := uuidsToPayloads[hp.CommandUUID] if payload.Status != nil && *payload.Status == fleet.MDMDeliveryFailed { - if hp.Retries < mdm.MaxProfileRetries { + if hp.Retries < mdm.MaxWindowsProfileRetries { // if we haven't hit the max retries, we set // the host profile status to nil (which causes // an install profile command to be enqueued diff --git a/server/fleet/mdm.go b/server/fleet/mdm.go index bc16f0c6bc..ead4b67d63 100644 --- a/server/fleet/mdm.go +++ b/server/fleet/mdm.go @@ -492,7 +492,7 @@ type MDMDeliveryStatus string // command failed to enqueue in ReconcileProfile (it resets the status to // NULL). A failure in the asynchronous actual response of the MDM command // (via MDMAppleCheckinAndCommandService.CommandAndReportResults) results in -// a retry of mdm.MaxProfileRetries times and if it still reports as failed +// a retry of mdm.MaxAppleProfileRetries times (or mdm.MaxWindowsProfileRetries for Windows) and if it still reports as failed // it will be set to failed permanently. // // - verified: the MDM command was successfully applied, and Fleet has diff --git a/server/mdm/apple/profile_verifier.go b/server/mdm/apple/profile_verifier.go index 0175aac103..7029bdd653 100644 --- a/server/mdm/apple/profile_verifier.go +++ b/server/mdm/apple/profile_verifier.go @@ -92,7 +92,7 @@ func VerifyHostMDMProfiles(ctx context.Context, ds fleet.ProfileVerificationStor retriesByProfileIdentifier[r.ProfileIdentifier] = r.Retries } for _, key := range missing { - if retriesByProfileIdentifier[key] < mdm.MaxProfileRetries { + if retriesByProfileIdentifier[key] < mdm.MaxAppleProfileRetries { // if we haven't hit the max retries, we set the host profile status to nil (which // causes an install profile command to be enqueued the next time the profile // manager cron runs) and increment the retry count @@ -125,7 +125,7 @@ func HandleHostMDMProfileInstallResult(ctx context.Context, ds fleet.ProfileVeri return err } - if m.Retries < mdm.MaxProfileRetries { + if m.Retries < mdm.MaxAppleProfileRetries { // if we haven't hit the max retries, we set the host profile status to nil (which // causes an install profile command to be enqueued the next time the profile // manager cron runs) and increment the retry count diff --git a/server/mdm/apple/profile_verifier_test.go b/server/mdm/apple/profile_verifier_test.go index 04d0a5a66e..f78f5b10bc 100644 --- a/server/mdm/apple/profile_verifier_test.go +++ b/server/mdm/apple/profile_verifier_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/fleetdm/fleet/v4/server/fleet" + "github.com/fleetdm/fleet/v4/server/mdm" "github.com/fleetdm/fleet/v4/server/mock" "github.com/stretchr/testify/require" ) @@ -53,7 +54,7 @@ func TestVerifyHostMDMProfiles(t *testing.T) { expectedProfiles: map[string]*fleet.ExpectedMDMProfile{"profile1": {}, "profile2": {}}, retryCounts: []fleet.HostMDMProfileRetryCount{ {ProfileIdentifier: "profile1", Retries: 0}, - {ProfileIdentifier: "profile2", Retries: 1}, + {ProfileIdentifier: "profile2", Retries: mdm.MaxAppleProfileRetries}, }, installed: map[string]*fleet.HostMacOSProfile{}, toRetry: []string{"profile1"}, @@ -78,7 +79,7 @@ func TestVerifyHostMDMProfiles(t *testing.T) { "profile1": {InstallDate: time.Now().Add(-24 * time.Hour)}, }, retryCounts: []fleet.HostMDMProfileRetryCount{ - {ProfileIdentifier: "profile1", Retries: 1}, + {ProfileIdentifier: "profile1", Retries: mdm.MaxAppleProfileRetries}, }, toFail: []string{"profile1"}, }, diff --git a/server/mdm/mdm.go b/server/mdm/mdm.go index 1d7ca592c6..bd5ae0276b 100644 --- a/server/mdm/mdm.go +++ b/server/mdm/mdm.go @@ -17,10 +17,13 @@ import ( "github.com/smallstep/pkcs7" ) -// MaxProfileRetries is the maximum times an install profile command may be -// retried, after which marked as failed and no further attempts will be made -// to install the profile. -const MaxProfileRetries = 1 +// MaxAppleProfileRetries is the maximum number of times an Apple install profile command may be retried after the +// initial attempt, after which it is marked as failed and no further attempts will be made to install the profile. +const MaxAppleProfileRetries = 3 + +// MaxWindowsProfileRetries is the maximum number of times a Windows install profile command may be retried after the +// initial attempt, after which it is marked as failed and no further attempts will be made to install the profile. +const MaxWindowsProfileRetries = 1 // DecryptBase64CMS decrypts a base64 encoded pkcs7-encrypted value using the // provided certificate and private key. diff --git a/server/service/apple_mdm_test.go b/server/service/apple_mdm_test.go index 659a7f7ab3..08a5eb6829 100644 --- a/server/service/apple_mdm_test.go +++ b/server/service/apple_mdm_test.go @@ -2031,7 +2031,7 @@ func TestMDMCommandAndReportResultsProfileHandling(t *testing.T) { errors: []mdm.ErrorChain{ {ErrorCode: 123, ErrorDomain: "testDomain", USEnglishDescription: "testMessage"}, }, - prevRetries: 1, // expect to fail + prevRetries: fleetmdm.MaxAppleProfileRetries, // expect to fail want: &fleet.HostMDMAppleProfile{ Status: &fleet.MDMDeliveryFailed, Detail: "testDomain (123): testMessage\n", @@ -2105,7 +2105,7 @@ func TestMDMCommandAndReportResultsProfileHandling(t *testing.T) { if c.requestType == "InstallProfile" && c.status == "Error" { shouldCheckCount = true } - if shouldCheckCount && c.prevRetries == uint(0) { + if shouldCheckCount && c.prevRetries < fleetmdm.MaxAppleProfileRetries { shouldRetry = true } if c.requestType == "RemoveProfile" || (c.requestType == "InstallProfile" && !shouldRetry) { diff --git a/server/service/integration_certificate_authorities_test.go b/server/service/integration_certificate_authorities_test.go index 3a2842814e..54999e0f6b 100644 --- a/server/service/integration_certificate_authorities_test.go +++ b/server/service/integration_certificate_authorities_test.go @@ -17,6 +17,7 @@ import ( "github.com/fleetdm/fleet/v4/ee/server/service/scep" "github.com/fleetdm/fleet/v4/server/datastore/mysql" "github.com/fleetdm/fleet/v4/server/fleet" + servermdm "github.com/fleetdm/fleet/v4/server/mdm" apple_mdm "github.com/fleetdm/fleet/v4/server/mdm/apple" "github.com/fleetdm/fleet/v4/server/mdm/apple/mobileconfig" "github.com/fleetdm/fleet/v4/server/mdm/nanomdm/mdm" @@ -1773,36 +1774,68 @@ func (s *integrationMDMTestSuite) TestSCEPChallengeExpirationRetriesSmallStep() require.Len(t, gotHostProfs, 1) require.Equal(t, expectHostProf, gotHostProfs[0]) - // simulate another failure during SCEP protocol, this time it won't be retried because normal retry limit is 1 + // simulate additional failures during SCEP protocol until retry limit is exhausted. + // Each error when retries < MaxAppleProfileRetries triggers a retry (retries++, status=NULL). + // When retries == MaxAppleProfileRetries, the next error marks as failed. + expectedChallengeCount := int64(3) // starts at 3 from above + for retries := 1; retries < servermdm.MaxAppleProfileRetries; retries++ { + // error triggers retry + cmd, err = mdmDevice.Err(prevCommandUUID, []mdm.ErrorChain{}) + require.NoError(t, err) + require.Nil(t, cmd) + + expectHostProf.CommandUUID = prevCommandUUID + expectHostProf.Status = nil + expectHostProf.Retries = retries + 1 + + gotHostProfs = listHostProfilesDB(host.UUID) + require.Len(t, gotHostProfs, 1) + require.Equal(t, expectHostProf, gotHostProfs[0]) + + // cron resends with new challenge + s.awaitTriggerProfileSchedule(t) + expectedChallengeCount++ + require.Equal(t, expectedChallengeCount, challengeCounter.Load()) + + cmd, err = mdmDevice.Idle() + require.NoError(t, err) + require.NotNil(t, cmd) + require.NotEqual(t, prevCommandUUID, cmd.CommandUUID) + prevCommandUUID = cmd.CommandUUID + require.Equal(t, "InstallProfile", cmd.Command.RequestType) + + expectHostProf.CommandUUID = cmd.CommandUUID + expectHostProf.Status = ptr.String("pending") + + gotHostProfs = listHostProfilesDB(host.UUID) + require.Len(t, gotHostProfs, 1) + require.Equal(t, expectHostProf, gotHostProfs[0]) + } + + // final error: retries == MaxAppleProfileRetries, should mark as failed cmd, err = mdmDevice.Err(prevCommandUUID, []mdm.ErrorChain{}) - require.NoError(t, err) // error report accepted by server - require.Nil(t, cmd) // no new command + require.NoError(t, err) + require.Nil(t, cmd) - // update expectations for host profile DB state after failure - expectHostProf.CommandUUID = prevCommandUUID // unchanged - expectHostProf.Status = ptr.String("failed") // should now be failed - expectHostProf.Retries = 1 // unchanged + expectHostProf.CommandUUID = prevCommandUUID + expectHostProf.Status = ptr.String("failed") + expectHostProf.Retries = servermdm.MaxAppleProfileRetries - // check DB state after failure gotHostProfs = listHostProfilesDB(host.UUID) require.Len(t, gotHostProfs, 1) require.Equal(t, expectHostProf, gotHostProfs[0]) - // MDM checkin should not expect new command cmd, err = mdmDevice.Idle() require.NoError(t, err) require.Nil(t, cmd) - // trigger another profile sync, which should not resend SCEP profile s.awaitTriggerProfileSchedule(t) - require.Equal(t, int64(3), challengeCounter.Load()) // challenge endpoint not called again because no retry should be attempted + require.Equal(t, expectedChallengeCount, challengeCounter.Load()) - // MDM checkin should not expect new command cmd, err = mdmDevice.Idle() require.NoError(t, err) require.Nil(t, cmd) - // check DB state to confirm no changes gotHostProfs = listHostProfilesDB(host.UUID) require.Len(t, gotHostProfs, 1) require.Equal(t, expectHostProf, gotHostProfs[0]) @@ -1810,7 +1843,7 @@ func (s *integrationMDMTestSuite) TestSCEPChallengeExpirationRetriesSmallStep() // manually resend the profile installation, which ignores retry limit // FIXME: manual resend doesn't change retries, but maybe it should reset to 0 _ = s.Do("POST", fmt.Sprintf("/api/v1/fleet/hosts/%d/configuration_profiles/%s/resend", host.ID, profUUID), nil, http.StatusAccepted) - require.Equal(t, int64(3), challengeCounter.Load()) // challenge endpoint not called until reconcilation runs + require.Equal(t, expectedChallengeCount, challengeCounter.Load()) // challenge endpoint not called until reconcilation runs // MDM checkin should not expect new command until reconciliation runs cmd, err = mdmDevice.Idle() @@ -1818,9 +1851,9 @@ func (s *integrationMDMTestSuite) TestSCEPChallengeExpirationRetriesSmallStep() require.Nil(t, cmd) // no new command should be issued yet // update expectations for host profile DB state after manual resend - expectHostProf.Status = nil // status should be cleared to allow retry - expectHostProf.Retries = 1 // unchanged for manual resend - expectHostProf.CommandUUID = prevCommandUUID // unchanged until reconcilation runs + expectHostProf.Status = nil // status should be cleared to allow retry + expectHostProf.Retries = servermdm.MaxAppleProfileRetries // unchanged for manual resend + expectHostProf.CommandUUID = prevCommandUUID // unchanged until reconcilation runs // check DB state after manual resend request gotHostProfs = listHostProfilesDB(host.UUID) @@ -1828,9 +1861,10 @@ func (s *integrationMDMTestSuite) TestSCEPChallengeExpirationRetriesSmallStep() require.Equal(t, expectHostProf, gotHostProfs[0]) // trigger another profile sync, which should resend SCEP profile - require.Equal(t, int64(3), challengeCounter.Load()) // challenge endpoint not called until reconcilation runs + require.Equal(t, expectedChallengeCount, challengeCounter.Load()) // challenge endpoint not called until reconcilation runs s.awaitTriggerProfileSchedule(t) - require.Equal(t, int64(4), challengeCounter.Load()) // challenge endpoint called again during host profile reconciliation + expectedChallengeCount++ + require.Equal(t, expectedChallengeCount, challengeCounter.Load()) // challenge endpoint called again during host profile reconciliation // MDM checkin should expect InstallProfile command with SCEP profile with new challenge cmd, err = mdmDevice.Idle() @@ -1842,9 +1876,9 @@ func (s *integrationMDMTestSuite) TestSCEPChallengeExpirationRetriesSmallStep() require.Equal(t, expectPayloadWithChallenge(), parseCommandPayload(cmd)) // challenge value should be updated // update expectations for host profile DB state - expectHostProf.CommandUUID = cmd.CommandUUID // should be updated to new command UUID - expectHostProf.Status = ptr.String("pending") // should now be pending again - expectHostProf.Retries = 1 // unchanged for manual resend + expectHostProf.CommandUUID = cmd.CommandUUID // should be updated to new command UUID + expectHostProf.Status = ptr.String("pending") // should now be pending again + expectHostProf.Retries = servermdm.MaxAppleProfileRetries // unchanged for manual resend // check DB state gotHostProfs = listHostProfilesDB(host.UUID) @@ -1888,9 +1922,10 @@ func (s *integrationMDMTestSuite) TestSCEPChallengeExpirationRetriesSmallStep() require.Nil(t, cmd) // trigger another profile sync, which should resend the SCEP profile installation - require.Equal(t, int64(4), challengeCounter.Load()) // challenge endpoint not called until reconcilation runs + require.Equal(t, expectedChallengeCount, challengeCounter.Load()) // challenge endpoint not called until reconcilation runs s.awaitTriggerProfileSchedule(t) - require.Equal(t, int64(5), challengeCounter.Load()) // challenge endpoint called with host profile reconciliation + expectedChallengeCount++ + require.Equal(t, expectedChallengeCount, challengeCounter.Load()) // challenge endpoint called with host profile reconciliation // MDM checkin should expect InstallProfile command with SCEP profile with new challenge cmd, err = mdmDevice.Idle() diff --git a/server/service/integration_mdm_profiles_test.go b/server/service/integration_mdm_profiles_test.go index 09c59321bb..cc6f21f939 100644 --- a/server/service/integration_mdm_profiles_test.go +++ b/server/service/integration_mdm_profiles_test.go @@ -735,28 +735,32 @@ func (s *integrationMDMTestSuite) TestAppleProfileRetries() { }) t.Run("retry after verification", func(t *testing.T) { - // report osquery results with I1 missing and confirm that the I1 marked as pending (initial retry) - reportHostProfs(t, "I2", mobileconfig.FleetdConfigPayloadIdentifier) - expectedProfileStatuses["I1"] = fleet.MDMDeliveryPending - checkProfilesStatus(t) - expectedRetryCounts["I1"] = 1 - checkRetryCounts(t) + // I1 already has retries=1 from the previous subtest, continue retrying until max retries exceeded + startRetries := expectedRetryCounts["I1"] + for retryNum := startRetries + 1; retryNum <= servermdm.MaxAppleProfileRetries; retryNum++ { + reportHostProfs(t, "I2", mobileconfig.FleetdConfigPayloadIdentifier) + expectedRetryCounts["I1"] = retryNum + // not yet at max retries, profile should be pending for retry + expectedProfileStatuses["I1"] = fleet.MDMDeliveryPending + checkProfilesStatus(t) + checkRetryCounts(t) - // trigger a profile sync and confirm that the install profile command for I1 was resent - s.awaitTriggerProfileSchedule(t) - installs, removes := checkNextPayloads(t, mdmDevice, false) - s.signedProfilesMatch([][]byte{initialExpectedProfiles[0]}, installs) - require.Empty(t, removes) + // trigger a profile sync and confirm that the install profile command for I1 was resent + s.awaitTriggerProfileSchedule(t) + installs, removes := checkNextPayloads(t, mdmDevice, false) + s.signedProfilesMatch([][]byte{initialExpectedProfiles[0]}, installs) + require.Empty(t, removes) + } - // report osquery results with I1 missing again and confirm that the I1 marked as failed (max retries exceeded) + // report osquery results with I1 missing again, now max retries exceeded reportHostProfs(t, "I2", mobileconfig.FleetdConfigPayloadIdentifier) expectedProfileStatuses["I1"] = fleet.MDMDeliveryFailed checkProfilesStatus(t) - checkRetryCounts(t) // unchanged + checkRetryCounts(t) // unchanged, still at max // trigger a profile sync and confirm that the install profile command for I1 was not resent s.awaitTriggerProfileSchedule(t) - installs, removes = checkNextPayloads(t, mdmDevice, false) + installs, removes := checkNextPayloads(t, mdmDevice, false) require.Empty(t, installs) require.Empty(t, removes) }) @@ -779,8 +783,19 @@ func (s *integrationMDMTestSuite) TestAppleProfileRetries() { expectedRetryCounts["I3"] = 1 checkRetryCounts(t) - // trigger a profile sync and confirm that the install profile command for I3 was sent and - // simulate a device ack + // continue retrying via device errors until max retries exceeded + for retryNum := uint(2); retryNum <= servermdm.MaxAppleProfileRetries; retryNum++ { + s.awaitTriggerProfileSchedule(t) + installs, removes = checkNextPayloads(t, mdmDevice, true) // simulate device error + s.signedProfilesMatch([][]byte{newProfile}, installs) + require.Empty(t, removes) + expectedProfileStatuses["I3"] = fleet.MDMDeliveryPending + expectedRetryCounts["I3"] = retryNum + checkProfilesStatus(t) + checkRetryCounts(t) + } + + // trigger a profile sync and simulate a device ack (retries already at max) s.awaitTriggerProfileSchedule(t) installs, removes = checkNextPayloads(t, mdmDevice, false) s.signedProfilesMatch([][]byte{newProfile}, installs) @@ -789,8 +804,7 @@ func (s *integrationMDMTestSuite) TestAppleProfileRetries() { checkProfilesStatus(t) checkRetryCounts(t) // unchanged - // report osquery results with I3 missing and confirm that the I3 marked as failed (max - // retries exceeded) + // report osquery results with I3 missing and confirm that I3 is marked as failed (max retries exceeded) reportHostProfs(t, "I2", mobileconfig.FleetdConfigPayloadIdentifier) expectedProfileStatuses["I3"] = fleet.MDMDeliveryFailed checkProfilesStatus(t) @@ -810,28 +824,28 @@ func (s *integrationMDMTestSuite) TestAppleProfileRetries() { s.Do("POST", "/api/v1/fleet/mdm/apple/profiles/batch", batchSetMDMAppleProfilesRequest{Profiles: testProfiles}, http.StatusNoContent) setProfileUploadedAt(t, time.Now().Add(-48*time.Hour), "I1", "I2", mobileconfig.FleetdConfigPayloadIdentifier, "I3", "I4") - // trigger a profile sync and confirm that the install profile command for I3 was sent and - // simulate a device error + // repeatedly simulate device errors until max retries exceeded + for retryNum := uint(1); retryNum <= servermdm.MaxAppleProfileRetries; retryNum++ { + s.awaitTriggerProfileSchedule(t) + installs, removes := checkNextPayloads(t, mdmDevice, true) + s.signedProfilesMatch([][]byte{newProfile}, installs) + require.Empty(t, removes) + expectedProfileStatuses["I4"] = fleet.MDMDeliveryPending + expectedRetryCounts["I4"] = retryNum + checkProfilesStatus(t) + checkRetryCounts(t) + } + + // one more device error should mark as failed s.awaitTriggerProfileSchedule(t) installs, removes := checkNextPayloads(t, mdmDevice, true) s.signedProfilesMatch([][]byte{newProfile}, installs) require.Empty(t, removes) - expectedProfileStatuses["I4"] = fleet.MDMDeliveryPending - checkProfilesStatus(t) - expectedRetryCounts["I4"] = 1 - checkRetryCounts(t) - - // trigger a profile sync and confirm that the install profile command for I4 was sent and - // simulate a second device error - s.awaitTriggerProfileSchedule(t) - installs, removes = checkNextPayloads(t, mdmDevice, true) - s.signedProfilesMatch([][]byte{newProfile}, installs) - require.Empty(t, removes) expectedProfileStatuses["I4"] = fleet.MDMDeliveryFailed checkProfilesStatus(t) checkRetryCounts(t) // unchanged - // trigger a profile sync and confirm that the install profile command for I3 was not resent + // trigger a profile sync and confirm that the install profile command for I4 was not resent s.awaitTriggerProfileSchedule(t) installs, removes = checkNextPayloads(t, mdmDevice, false) require.Empty(t, installs) @@ -879,8 +893,23 @@ func (s *integrationMDMTestSuite) TestAppleProfileRetries() { require.Empty(t, installs) require.Empty(t, removes) - // report osquery results again, this time I5 is missing and confirm that the I5 marked as - // failed (max retries exceeded) + // report osquery results again with I5 missing, retry until max retries exceeded. + // Each iteration: report missing (increments retries, sets status=NULL) -> cron re-enqueues -> ack (verifying). + // When retries reaches MaxAppleProfileRetries, the next missing report marks it as failed. + for expectedRetryCounts["I5"] < servermdm.MaxAppleProfileRetries { + reportHostProfs(t, "I2", mobileconfig.FleetdConfigPayloadIdentifier) + expectedRetryCounts["I5"]++ + expectedProfileStatuses["I5"] = fleet.MDMDeliveryPending + checkProfilesStatus(t) + checkRetryCounts(t) + + s.awaitTriggerProfileSchedule(t) + installs, removes = checkNextPayloads(t, mdmDevice, false) + s.signedProfilesMatch([][]byte{newProfile}, installs) + require.Empty(t, removes) + } + + // one final missing report: retries == MaxAppleProfileRetries, should be failed reportHostProfs(t, "I2", mobileconfig.FleetdConfigPayloadIdentifier) expectedProfileStatuses["I5"] = fleet.MDMDeliveryFailed checkProfilesStatus(t) @@ -984,7 +1013,7 @@ func (s *integrationMDMTestSuite) TestWindowsProfileRetries() { checkRetryCounts(t) // no retries }) - retriesBeforeFailure := servermdm.MaxProfileRetries + retriesBeforeFailure := servermdm.MaxWindowsProfileRetries t.Run(fmt.Sprintf("retries %d time before marking as failed", retriesBeforeFailure), func(t *testing.T) { s.Do("POST", "/api/v1/fleet/mdm/profiles/batch", batchSetMDMProfilesRequest{Profiles: testProfiles}, http.StatusNoContent) mysql.ExecAdhocSQL(t, s.ds, func(q sqlx.ExtContext) error { @@ -1114,7 +1143,7 @@ func (s *integrationMDMTestSuite) TestWindowsProfileRetries() { expectedProfileStatuses["N1"] = fleet.MDMDeliveryFailed expectedProfileStatuses["N2"] = fleet.MDMDeliveryVerified checkProfilesStatus(t) - expectedRetryCounts["N1"] = servermdm.MaxProfileRetries + expectedRetryCounts["N1"] = servermdm.MaxWindowsProfileRetries checkRetryCounts(t) }) } @@ -3983,7 +4012,7 @@ func (s *integrationMDMTestSuite) TestWindowsProfileManagement() { }) wantDeliveryStatus := fleet.WindowsResponseToDeliveryStatus(wantStatus) - if gotProfile.Retries <= servermdm.MaxProfileRetries && wantDeliveryStatus == fleet.MDMDeliveryFailed { + if gotProfile.Retries <= servermdm.MaxWindowsProfileRetries && wantDeliveryStatus == fleet.MDMDeliveryFailed { require.EqualValues(t, "pending", gotProfile.Status, "command_uuid", cmd.Cmd.CmdID.Value) } else { require.EqualValues(t, wantDeliveryStatus, gotProfile.Status, "command_uuid", cmd.Cmd.CmdID.Value) @@ -7305,10 +7334,15 @@ func (s *integrationMDMTestSuite) TestVerifyUserScopedProfiles() { s.awaitTriggerProfileSchedule(t) - // force-set it to Verifying so that by being missing again it goes to failed + // force-set it to Verifying and set retries to max-1 so that by being missing again it goes to failed // (it doesn't go to failed if it is pending) forceSetAppleHostProfileStatus(t, s.ds, host.UUID, test.ToMDMAppleConfigProfile(profNameToPayload["A3"]), fleet.MDMOperationTypeInstall, fleet.MDMDeliveryVerifying) + mysql.ExecAdhocSQL(t, s.ds, func(q sqlx.ExtContext) error { + _, err := q.ExecContext(ctx, `UPDATE host_mdm_apple_profiles SET retries = ? WHERE host_uuid = ? AND profile_identifier = ?`, + servermdm.MaxAppleProfileRetries, host.UUID, profNameToPayload["A3"].Identifier) + return err + }) err = apple_mdm.VerifyHostMDMProfiles(ctx, s.ds, host, map[string]*fleet.HostMacOSProfile{ profNameToPayload["A1"].Identifier: { @@ -7349,7 +7383,7 @@ func (s *integrationMDMTestSuite) TestVerifyUserScopedProfiles() { ProfileName: profNameToPayload["A3"].Name, Status: ptr.String(string(fleet.MDMDeliveryFailed)), OperationType: ptr.String(string(fleet.MDMOperationTypeInstall)), - Retries: 1, + Retries: servermdm.MaxAppleProfileRetries, Scope: string(fleet.PayloadScopeUser), }, }) diff --git a/server/service/integration_mdm_test.go b/server/service/integration_mdm_test.go index 01b780ccf0..2423f791fe 100644 --- a/server/service/integration_mdm_test.go +++ b/server/service/integration_mdm_test.go @@ -11536,12 +11536,21 @@ func (s *integrationMDMTestSuite) TestRemoveFailedProfiles() { "I1": {Identifier: "I1", DisplayName: "I1", InstallDate: time.Now()}, })) - // Do another trigger + command fetching cycle, since we retry when a profile fails on install. + // Do additional trigger + command fetching cycles until max retries are exhausted. + // Each cycle: cron re-enqueues the profile, device responds with error, HandleHostMDMProfileInstallResult increments retries. + for range servermdm.MaxAppleProfileRetries - 1 { + s.awaitTriggerProfileSchedule(t) + mdmDeviceRespond(mdmDevice) + } + + // One more cycle: cron re-enqueues, device responds with error. Since retries == MaxAppleProfileRetries, + // HandleHostMDMProfileInstallResult marks the profile as failed. s.awaitTriggerProfileSchedule(t) mdmDeviceRespond(mdmDevice) + // Verify the non-failing profiles via osquery require.NoError(t, apple_mdm.VerifyHostMDMProfiles(context.Background(), s.ds, host, map[string]*fleet.HostMacOSProfile{ - "I1": {Identifier: "I1", DisplayName: "I1", InstallDate: time.Now()}, + "I2": {Identifier: "I2", DisplayName: "I2", InstallDate: time.Now()}, mobileconfig.FleetdConfigPayloadIdentifier: {Identifier: mobileconfig.FleetdConfigPayloadIdentifier, DisplayName: "dn1", InstallDate: time.Now()}, mobileconfig.FleetCARootConfigPayloadIdentifier: {Identifier: mobileconfig.FleetCARootConfigPayloadIdentifier, DisplayName: "dn2", InstallDate: time.Now()}, }))