fix: properly catch and log APNs errors (#21753)

found reproducing other issues:

1. In the APNs cron, the logger wasn't good enough to print an slice and
the log message was "unsupported type"
2. `APNSDeliveryError` _always_ had `Err` set to nil, while we were
catching those errors, it was impossible to see the cause in the logs
(always printed err=nil)

# Checklist for submitter

If some of the following don't apply, delete the relevant line.

<!-- Note that API documentation changes are now addressed by the
product design team. -->

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.
See [Changes
files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/Committing-Changes.md#changes-files)
for more information.
- [x] Manual QA for all new/changed functionality
This commit is contained in:
Roberto Dip 2024-09-03 11:40:17 -03:00 committed by GitHub
parent 1b06b050d7
commit f6165a220a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 88 additions and 10 deletions

1
changes/apns-errors Normal file
View file

@ -0,0 +1 @@
* Fixed logic to properly catch and log APNs errors.

View file

@ -5,6 +5,8 @@ import (
"encoding/base64"
"fmt"
"net/http"
"sort"
"strings"
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
"github.com/fleetdm/fleet/v4/server/fleet"
@ -383,14 +385,15 @@ func (svc *MDMAppleCommander) SendNotifications(ctx context.Context, hostUUIDs [
// Even if we didn't get an error, some of the APNs
// responses might have failed, signal that to the caller.
var failed []string
failed := map[string]error{}
for uuid, response := range apnsResponses {
if response.Err != nil {
failed = append(failed, uuid)
failed[uuid] = response.Err
}
}
if len(failed) > 0 {
return &APNSDeliveryError{FailedUUIDs: failed, Err: err}
return &APNSDeliveryError{errorsByUUID: failed}
}
return nil
@ -399,14 +402,38 @@ func (svc *MDMAppleCommander) SendNotifications(ctx context.Context, hostUUIDs [
// APNSDeliveryError records an error and the associated host UUIDs in which it
// occurred.
type APNSDeliveryError struct {
FailedUUIDs []string
Err error
errorsByUUID map[string]error
}
func (e *APNSDeliveryError) Error() string {
return fmt.Sprintf("APNS delivery failed with: %s, for UUIDs: %v", e.Err, e.FailedUUIDs)
var uuids []string
for uuid := range e.errorsByUUID {
uuids = append(uuids, uuid)
}
// sort UUIDs alphabetically for deterministic output
sort.Strings(uuids)
var errStrings []string
for _, uuid := range uuids {
errStrings = append(errStrings, fmt.Sprintf("UUID: %s, Error: %v", uuid, e.errorsByUUID[uuid]))
}
return fmt.Sprintf(
"APNS delivery failed with the following errors:\n%s",
strings.Join(errStrings, "\n"),
)
}
func (e *APNSDeliveryError) Unwrap() error { return e.Err }
func (e *APNSDeliveryError) FailedUUIDs() []string {
var uuids []string
for uuid := range e.errorsByUUID {
uuids = append(uuids, uuid)
}
// sort UUIDs alphabetically for deterministic output
sort.Strings(uuids)
return uuids
}
func (e *APNSDeliveryError) StatusCode() int { return http.StatusBadGateway }

View file

@ -3,7 +3,9 @@ package apple_mdm
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net/http"
"os"
"testing"
@ -200,3 +202,50 @@ func mobileconfigForTest(name, identifier string) []byte {
</plist>
`, name, identifier, uuid.New().String()))
}
func TestAPNSDeliveryError(t *testing.T) {
tests := []struct {
name string
errorsByUUID map[string]error
expectedError string
expectedFailedUUIDs []string
expectedStatusCode int
}{
{
name: "single error",
errorsByUUID: map[string]error{
"uuid1": errors.New("network error"),
},
expectedError: `APNS delivery failed with the following errors:
UUID: uuid1, Error: network error`,
expectedFailedUUIDs: []string{"uuid1"},
expectedStatusCode: http.StatusBadGateway,
},
{
name: "multiple errors, sorted",
errorsByUUID: map[string]error{
"uuid3": errors.New("timeout error"),
"uuid1": errors.New("network error"),
"uuid2": errors.New("certificate error"),
},
expectedError: `APNS delivery failed with the following errors:
UUID: uuid1, Error: network error
UUID: uuid2, Error: certificate error
UUID: uuid3, Error: timeout error`,
expectedFailedUUIDs: []string{"uuid1", "uuid2", "uuid3"},
expectedStatusCode: http.StatusBadGateway,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
apnsErr := &APNSDeliveryError{
errorsByUUID: tt.errorsByUUID,
}
require.Equal(t, tt.expectedError, apnsErr.Error())
require.Equal(t, tt.expectedFailedUUIDs, apnsErr.FailedUUIDs())
require.Equal(t, tt.expectedStatusCode, apnsErr.StatusCode())
})
}
}

View file

@ -3149,7 +3149,7 @@ func SendPushesToPendingDevices(
if err := commander.SendNotifications(ctx, uuids); err != nil {
var apnsErr *apple_mdm.APNSDeliveryError
if errors.As(err, &apnsErr) {
level.Info(logger).Log("msg", "failed to send APNs notification to some hosts", "host_uuids", apnsErr.FailedUUIDs)
level.Info(logger).Log("msg", "failed to send APNs notification to some hosts", "error", apnsErr.Error())
return nil
}

View file

@ -566,13 +566,14 @@ func (svc *Service) enqueueAppleMDMCommand(ctx context.Context, rawXMLCmd []byte
var apnsErr *apple_mdm.APNSDeliveryError
var mysqlErr *mysql.MySQLError
if errors.As(err, &apnsErr) {
if len(apnsErr.FailedUUIDs) < len(deviceIDs) {
failedUUIDs := apnsErr.FailedUUIDs()
if len(failedUUIDs) < len(deviceIDs) {
// some hosts properly received the command, so return success, with the list
// of failed uuids.
return &fleet.CommandEnqueueResult{
CommandUUID: cmd.CommandUUID,
RequestType: cmd.Command.RequestType,
FailedUUIDs: apnsErr.FailedUUIDs,
FailedUUIDs: failedUUIDs,
}, nil
}
// push failed for all hosts