Add more logging to DEP sync codepath (#38649)

This commit is contained in:
Sarah Gillespie 2026-01-23 10:25:24 -06:00 committed by GitHub
parent 1dc3301f2d
commit af2d8a247f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 62 additions and 18 deletions

View file

@ -2054,6 +2054,7 @@ func (ds *Datastore) DeleteHostDEPAssignmentsFromAnotherABM(ctx context.Context,
return ctxerr.Wrap(ctx, err, "building IN statement for selecting host serials")
}
var others []depAssignment
// FIXME: Should we use the writer here? Should we use a transaction?
if err = sqlx.SelectContext(ctx, ds.reader(ctx), &others, selectStmt, selectArgs...); err != nil {
return ctxerr.Wrap(ctx, err, "selecting host serials")
}
@ -2075,7 +2076,7 @@ func (ds *Datastore) DeleteHostDEPAssignments(ctx context.Context, abmTokenID ui
}
selectStmt, selectArgs, err := sqlx.In(`
SELECT h.id, hmdm.enrollment_status
SELECT h.id, h.hardware_serial, hmdm.enrollment_status
FROM hosts h
JOIN host_dep_assignments hdep ON h.id = hdep.host_id
LEFT JOIN host_mdm hmdm ON h.id = hmdm.host_id
@ -2087,6 +2088,7 @@ func (ds *Datastore) DeleteHostDEPAssignments(ctx context.Context, abmTokenID ui
return ds.withTx(ctx, func(tx sqlx.ExtContext) error {
type hostWithEnrollmentStatus struct {
ID uint `db:"id"`
HardwareSerial string `db:"hardware_serial"`
EnrollmentStatus *string `db:"enrollment_status"`
}
var hosts []hostWithEnrollmentStatus
@ -2097,14 +2099,28 @@ func (ds *Datastore) DeleteHostDEPAssignments(ctx context.Context, abmTokenID ui
// Nothing to delete. Hosts may have already been transferred to another ABM.
return nil
}
var hostIDs []uint
var hostIDsPending []uint
hostIDs := make([]uint, 0, len(hosts))
pendingHostIDs := make([]uint, 0)
byStatus := make(map[string][]string)
for _, host := range hosts {
hostIDs = append(hostIDs, host.ID)
if host.EnrollmentStatus != nil && *host.EnrollmentStatus == "Pending" {
hostIDsPending = append(hostIDsPending, host.ID)
if host.EnrollmentStatus == nil {
byStatus["unexpected_nil_status"] = append(byStatus["unexpected_nil_status"], host.HardwareSerial)
} else {
byStatus[*host.EnrollmentStatus] = append(byStatus[*host.EnrollmentStatus], host.HardwareSerial)
if *host.EnrollmentStatus == "Pending" {
pendingHostIDs = append(pendingHostIDs, host.ID)
}
}
}
logs := []any{"msg", "preparing to delete host DEP assignments: select hosts with enrollment status"}
for status, serials := range byStatus {
logs = append(logs, status, fmt.Sprintf("%+v", serials))
}
level.Info(ds.logger).Log(logs...)
level.Info(ds.logger).Log("msg", "deleting host DEP assignments", "host_ids", fmt.Sprintf("%+v", hostIDs))
stmt, args, err := sqlx.In(`
UPDATE host_dep_assignments
@ -2121,11 +2137,12 @@ func (ds *Datastore) DeleteHostDEPAssignments(ctx context.Context, abmTokenID ui
// If pending host is no longer in ABM, we should delete it because it will never enroll in Fleet.
// If the host is later re-added to ABM, it will be re-created.
if len(hostIDsPending) == 0 {
if len(byStatus["Pending"]) == 0 {
return nil
}
level.Info(ds.logger).Log("msg", "deleting pending hosts that are no longer in ABM", "host_ids", fmt.Sprintf("%+v", pendingHostIDs), "serials", fmt.Sprintf("%+v", byStatus["Pending"]))
return deleteHosts(ctx, tx, hostIDsPending)
return deleteHosts(ctx, tx, pendingHostIDs)
})
}
@ -4742,6 +4759,17 @@ WHERE
}
}
var totalProcessed int
for _, serials := range skipSerialsByOrgName {
totalProcessed += len(serials)
}
for _, serials := range serialsByOrgName {
totalProcessed += len(serials)
}
if totalProcessed != len(serials) {
level.Error(ds.logger).Log("msg", fmt.Sprintf("screen dep serials: expected to process %d serials but processed %d", len(serials), totalProcessed))
}
return skipSerialsByOrgName, serialsByOrgName, nil
}

View file

@ -5949,6 +5949,8 @@ WHERE
return nil, err
}
level.Info(ds.logger).Log("msg", "get hosts with previously deleted dep assignments", "serials", strings.Join(matchingSerials, ","))
for _, serial := range matchingSerials {
result[serial] = struct{}{}
}

View file

@ -625,13 +625,15 @@ func (d *DEPService) processDeviceResponse(
if device.MDMMigrationDeadline != nil {
deadline = device.MDMMigrationDeadline.String()
}
level.Debug(d.logger).Log( // Keeping this at Debug level since this could generate a lot of log traffic (one per device)
"msg", "device",
// FIXME: Move this log back to debug level after we've added/improved functionality for accessing DEP status.
level.Info(d.logger).Log(
"msg", "process device response",
"serial_number", device.SerialNumber,
"device_assigned_by", device.DeviceAssignedBy,
"device_assigned_date", device.DeviceAssignedDate,
"op_date", device.OpDate,
"op_type", device.OpType,
"profile_status", device.ProfileStatus,
"profile_assign_time", device.ProfileAssignTime,
"push_push_time", device.ProfilePushTime,
"profile_uuid", device.ProfileUUID,
@ -660,6 +662,9 @@ func (d *DEPService) processDeviceResponse(
// Remove added/modified devices if they have been subsequently deleted
// Remove deleted devices if they have been subsequently added (or re-added)
for _, deletedDevice := range deletedDevices {
// FIXME: Shouldn't the logic for modified devices follow the if/else pattern used for added
// devices? It seems like it should, but it doesn't seem to be making a difference in
// practice. Presumably, we're catching this sommewhere else, but it isn't obvious where.
modifiedDevice, ok := modifiedDevices[deletedDevice.SerialNumber]
if ok && deletedDevice.OpDate.After(modifiedDevice.OpDate) {
delete(modifiedDevices, deletedDevice.SerialNumber)
@ -683,6 +688,7 @@ func (d *DEPService) processDeviceResponse(
needProfileAssign[addedDevice.SerialNumber] = struct{}{}
}
for _, modifiedDevice := range modifiedDevices {
// FIXME: Are we properly determining whether a modified device needs a profile assigned?
modifiedSerials = append(modifiedSerials, modifiedDevice.SerialNumber)
}
for _, deletedDevice := range deletedDevices {
@ -703,10 +709,13 @@ func (d *DEPService) processDeviceResponse(
// if the IT admin changes the MDM server assignment in the ABM UI. In
// these cases, the device is new ("added") to us, but it comes with
// the wrong op_type.
for _, d := range modifiedDevices {
if _, ok := existingSerials[d.SerialNumber]; !ok {
addedDevicesSlice = append(addedDevicesSlice, d)
for _, md := range modifiedDevices {
if _, ok := existingSerials[md.SerialNumber]; !ok {
level.Info(d.logger).Log("msg", "treating device with op_type modified as added device", "serial_number", md.SerialNumber)
addedDevicesSlice = append(addedDevicesSlice, md)
}
// FIXME: addedDevicesSlice is used in part to determine if a profile assignment is needed.
// Should be be checking if the modified device has the right profile UUID and current timestamp?
}
// Check if added devices belong to another ABM server. If so, we must delete them before adding them.
@ -746,8 +755,11 @@ func (d *DEPService) processDeviceResponse(
level.Debug(kitlog.With(d.logger)).Log("msg", "no DEP hosts to add")
}
level.Info(kitlog.With(d.logger)).Log("msg", "devices to assign DEP profiles", "to_add", len(addedDevicesSlice), "to_remove",
strings.Join(deletedSerials, ", "), "to_modify", strings.Join(modifiedSerials, ", "))
level.Info(kitlog.With(d.logger)).Log("msg", "devices to assign DEP profiles",
"to_add", strings.Join(addedSerials, ", "),
"to_remove", strings.Join(deletedSerials, ", "),
"to_modify", strings.Join(modifiedSerials, ", "),
)
// at this point, the hosts rows are created for the devices, with the
// correct team_id, so we know what team-specific profile needs to be applied.
@ -786,18 +798,19 @@ func (d *DEPService) processDeviceResponse(
existingHosts := []fleet.Host{}
existingHostMigrationDeadlines := make(map[uint]time.Time)
for _, existingHost := range existingSerials {
dd, ok := modifiedDevices[existingHost.HardwareSerial]
level.Info(d.logger).Log("msg", "preparing to upsert DEP assignment for existing host", "serial", existingHost.HardwareSerial, "host_id", existingHost.ID)
md, ok := modifiedDevices[existingHost.HardwareSerial]
if !ok {
level.Error(kitlog.With(d.logger)).Log("msg",
"serial coming from ABM is in the database, but it's not in the list of modified devices", "serial",
existingHost.HardwareSerial)
continue
}
if dd.MDMMigrationDeadline != nil {
existingHostMigrationDeadlines[existingHost.ID] = *dd.MDMMigrationDeadline
if md.MDMMigrationDeadline != nil {
existingHostMigrationDeadlines[existingHost.ID] = *md.MDMMigrationDeadline
}
existingHosts = append(existingHosts, *existingHost)
devicesByTeam[existingHost.TeamID] = append(devicesByTeam[existingHost.TeamID], dd)
devicesByTeam[existingHost.TeamID] = append(devicesByTeam[existingHost.TeamID], md)
}
// Upsert the host DEP assignment records now so that the team is properly linked to the ABM
@ -810,6 +823,7 @@ func (d *DEPService) processDeviceResponse(
// assign the profile to each device
for team, devices := range devicesByTeam {
// FIXME: Do we have replication issues or races? There seem to be alot of calls going on inside this function.
profUUID, err := d.getProfileUUIDForTeam(ctx, team, abmOrganizationName)
if err != nil {
return ctxerr.Wrapf(ctx, err, "getting profile for team with id: %v", team)