From 3e19cd90a9eadfbcc8b331c3514c8d5e4370170b Mon Sep 17 00:00:00 2001 From: Lucas Manuel Rodriguez Date: Mon, 10 Jun 2024 16:48:05 -0300 Subject: [PATCH] Log warning when hosts enroll with duplicate hardware UUIDs (#19475) #16393 - [X] Changes file added for user-visible changes in `changes/`, `orbit/changes/` or `ee/fleetd-chrome/changes`. See [Changes files](https://fleetdm.com/docs/contributing/committing-changes#changes-files) for more information. - [X] Input data is properly validated, `SELECT *` is avoided, SQL injection is prevented (using placeholders for values in statements) - [X] Added/updated tests - [X] Manual QA for all new/changed functionality --- changes/16393-add-warning-log-duplicate-uuid | 1 + server/datastore/mysql/apple_mdm.go | 4 +- server/datastore/mysql/hosts.go | 105 ++++++++++++++----- server/datastore/mysql/hosts_test.go | 56 ++++++++++ 4 files changed, 140 insertions(+), 26 deletions(-) create mode 100644 changes/16393-add-warning-log-duplicate-uuid diff --git a/changes/16393-add-warning-log-duplicate-uuid b/changes/16393-add-warning-log-duplicate-uuid new file mode 100644 index 0000000000..f1e8001baf --- /dev/null +++ b/changes/16393-add-warning-log-duplicate-uuid @@ -0,0 +1 @@ +* Added warning server log when hosts are enrolling with duplicate hardware identifiers. diff --git a/server/datastore/mysql/apple_mdm.go b/server/datastore/mysql/apple_mdm.go index 17f643bd2a..4140977088 100644 --- a/server/datastore/mysql/apple_mdm.go +++ b/server/datastore/mysql/apple_mdm.go @@ -734,7 +734,7 @@ func ingestMDMAppleDeviceFromCheckinDB( // MDM is necessarily enabled if this gets called, always pass true for that // parameter. - matchID, _, err := matchHostDuringEnrollment(ctx, tx, mdmEnroll, true, "", mdmHost.UUID, mdmHost.HardwareSerial) + enrolledHostInfo, err := matchHostDuringEnrollment(ctx, tx, mdmEnroll, true, "", mdmHost.UUID, mdmHost.HardwareSerial) switch { case errors.Is(err, sql.ErrNoRows): return insertMDMAppleHostDB(ctx, tx, mdmHost, logger, appCfg) @@ -743,7 +743,7 @@ func ingestMDMAppleDeviceFromCheckinDB( return ctxerr.Wrap(ctx, err, "get mdm apple host by serial number or udid") default: - return updateMDMAppleHostDB(ctx, tx, matchID, mdmHost, appCfg) + return updateMDMAppleHostDB(ctx, tx, enrolledHostInfo.ID, mdmHost, appCfg) } } diff --git a/server/datastore/mysql/hosts.go b/server/datastore/mysql/hosts.go index 190d65ba0c..07d80885de 100644 --- a/server/datastore/mysql/hosts.go +++ b/server/datastore/mysql/hosts.go @@ -1697,6 +1697,22 @@ const ( mdmEnroll ) +// enrolledHostInfo contains information of an enrolled host to +// be used when enrolling orbit/osquery, MDM or just re-enrolling hosts +// (e.g. when a osquery.db is deleted from a host). +// +// NOTE: orbit and osquery running as part of fleetd on a device are identified +// with the same entry in the hosts table. +type enrolledHostInfo struct { + // ID is the identifier of the host. + ID uint + // LastEnrolledAt is the time the host last enrolled to Fleet. + LastEnrolledAt time.Time + // NodeKeySet indicates whether `node_key` is set (NOT NULL) for a osquery host + // or if `orbit_node_key` is set (NOT NULL) for a orbit host. + NodeKeySet bool +} + // Attempts to find the matching host ID by osqueryID, host UUID or serial // number. Any of those fields can be left empty if not available, and it will // use the best match in this order: @@ -1711,10 +1727,17 @@ const ( // able to match by serial in this scenario, since this is the only information // we get when enrolling hosts via Apple DEP) AND if the matched host is on the // macOS platform (darwin). -func matchHostDuringEnrollment(ctx context.Context, q sqlx.QueryerContext, enrollType enroll, isMDMEnabled bool, osqueryID, uuid, serial string) (uint, time.Time, error) { +func matchHostDuringEnrollment( + ctx context.Context, + q sqlx.QueryerContext, + enrollType enroll, + isMDMEnabled bool, + osqueryID, uuid, serial string, +) (*enrolledHostInfo, error) { type hostMatch struct { ID uint LastEnrolledAt time.Time `db:"last_enrolled_at"` + NodeKeySet bool `db:"node_key_set"` Priority int } @@ -1724,8 +1747,14 @@ func matchHostDuringEnrollment(ctx context.Context, q sqlx.QueryerContext, enrol rows []hostMatch ) + // For enrollType == mdmEnroll, nodeKeyColumn doesn't matter. + nodeKeyColumn := "node_key" + if enrollType == orbitEnroll { + nodeKeyColumn = "orbit_node_key" + } + if osqueryID != "" || uuid != "" { - _, _ = query.WriteString(`(SELECT id, last_enrolled_at, 1 priority FROM hosts WHERE osquery_host_id = ?)`) + _, _ = query.WriteString(fmt.Sprintf(`(SELECT id, last_enrolled_at, %s IS NOT NULL AS node_key_set, 1 priority FROM hosts WHERE osquery_host_id = ?)`, nodeKeyColumn)) osqueryHostID := osqueryID if osqueryID == "" { // special-case, if there's no osquery identifier, use the uuid @@ -1741,21 +1770,25 @@ func matchHostDuringEnrollment(ctx context.Context, q sqlx.QueryerContext, enrol if query.Len() > 0 { _, _ = query.WriteString(" UNION ") } - _, _ = query.WriteString(`(SELECT id, last_enrolled_at, 2 priority FROM hosts WHERE hardware_serial = ? AND (platform = 'darwin' OR platform = 'ios' OR platform = 'ipados') ORDER BY id LIMIT 1)`) + _, _ = query.WriteString(fmt.Sprintf(`(SELECT id, last_enrolled_at, %s IS NOT NULL AS node_key_set, 2 priority FROM hosts WHERE hardware_serial = ? AND (platform = 'darwin' OR platform = 'ios' OR platform = 'ipados') ORDER BY id LIMIT 1)`, nodeKeyColumn)) args = append(args, serial) } if err := sqlx.SelectContext(ctx, q, &rows, query.String(), args...); err != nil { - return 0, time.Time{}, ctxerr.Wrap(ctx, err, "match host during enrollment") + return nil, ctxerr.Wrap(ctx, err, "match host during enrollment") } if len(rows) == 0 { - return 0, time.Time{}, sql.ErrNoRows + return nil, sql.ErrNoRows } sort.Slice(rows, func(i, j int) bool { l, r := rows[i], rows[j] return l.Priority < r.Priority }) - return rows[0].ID, rows[0].LastEnrolledAt, nil + return &enrolledHostInfo{ + ID: rows[0].ID, + LastEnrolledAt: rows[0].LastEnrolledAt, + NodeKeySet: rows[0].NodeKeySet, + }, nil } func (ds *Datastore) EnrollOrbit(ctx context.Context, isMDMEnabled bool, hostInfo fleet.OrbitHostInfo, orbitNodeKey string, teamID *uint) (*fleet.Host, error) { @@ -1769,7 +1802,7 @@ func (ds *Datastore) EnrollOrbit(ctx context.Context, isMDMEnabled bool, hostInf var host fleet.Host err := ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error { - hostID, _, err := matchHostDuringEnrollment(ctx, tx, orbitEnroll, isMDMEnabled, hostInfo.OsqueryIdentifier, hostInfo.HardwareUUID, hostInfo.HardwareSerial) + enrolledHostInfo, err := matchHostDuringEnrollment(ctx, tx, orbitEnroll, isMDMEnabled, hostInfo.OsqueryIdentifier, hostInfo.HardwareUUID, hostInfo.HardwareSerial) // If the osquery identifier that osqueryd will use was not sent by Orbit, then use the hardware UUID as identifier // (using the hardware UUID is Orbit's default behavior). @@ -1780,6 +1813,17 @@ func (ds *Datastore) EnrollOrbit(ctx context.Context, isMDMEnabled bool, hostInf switch { case err == nil: + if enrolledHostInfo.NodeKeySet { + // This means a orbit host already enrolled at this hosts entry. + // This can happen if two devices have duplicate hardware identifiers or + // if orbit's node key file was deleted from the device (e.g. uninstall+install). + level.Warn(ds.logger).Log( + "msg", "orbit host with duplicate identifier has enrolled in Fleet and will overwrite existing host data", + "identifier", hostInfo.HardwareUUID, + "host_id", enrolledHostInfo.ID, + ) + } + sqlUpdate := ` UPDATE hosts @@ -1788,6 +1832,7 @@ func (ds *Datastore) EnrollOrbit(ctx context.Context, isMDMEnabled bool, hostInf uuid = COALESCE(NULLIF(uuid, ''), ?), osquery_host_id = COALESCE(NULLIF(osquery_host_id, ''), ?), hardware_serial = COALESCE(NULLIF(hardware_serial, ''), ?), + last_enrolled_at = NOW(), team_id = ? WHERE id = ?` _, err := tx.ExecContext(ctx, sqlUpdate, @@ -1796,15 +1841,15 @@ func (ds *Datastore) EnrollOrbit(ctx context.Context, isMDMEnabled bool, hostInf osqueryIdentifier, hostInfo.HardwareSerial, teamID, - hostID, + enrolledHostInfo.ID, ) if err != nil { return ctxerr.Wrap(ctx, err, "orbit enroll error updating host details") } - host.ID = hostID + host.ID = enrolledHostInfo.ID // clear any host_mdm_actions following re-enrollment here - if _, err := tx.ExecContext(ctx, `DELETE FROM host_mdm_actions WHERE host_id = ?`, hostID); err != nil { + if _, err := tx.ExecContext(ctx, `DELETE FROM host_mdm_actions WHERE host_id = ?`, enrolledHostInfo.ID); err != nil { return ctxerr.Wrap(ctx, err, "orbit enroll error clearing host_mdm_actions") } @@ -1871,7 +1916,7 @@ func (ds *Datastore) EnrollOrbit(ctx context.Context, isMDMEnabled bool, hostInf return &host, nil } -// EnrollHost enrolls a host +// EnrollHost enrolls the osquery agent to Fleet. func (ds *Datastore) EnrollHost(ctx context.Context, isMDMEnabled bool, osqueryHostID, hardwareUUID, hardwareSerial, nodeKey string, teamID *uint, cooldown time.Duration) (*fleet.Host, error) { if osqueryHostID == "" { return nil, ctxerr.New(ctx, "missing osquery host identifier") @@ -1881,11 +1926,11 @@ func (ds *Datastore) EnrollHost(ctx context.Context, isMDMEnabled bool, osqueryH err := ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error { zeroTime := time.Unix(0, 0).Add(24 * time.Hour) - matchedID, lastEnrolledAt, err := matchHostDuringEnrollment(ctx, tx, osqueryEnroll, isMDMEnabled, osqueryHostID, hardwareUUID, hardwareSerial) + var hostID uint + enrolledHostInfo, err := matchHostDuringEnrollment(ctx, tx, osqueryEnroll, isMDMEnabled, osqueryHostID, hardwareUUID, hardwareSerial) switch { case err != nil && !errors.Is(err, sql.ErrNoRows): return ctxerr.Wrap(ctx, err, "check existing") - case errors.Is(err, sql.ErrNoRows): // Create new host record. We always create newly enrolled hosts with refetch_requested = true // so that the frontend automatically starts background checks to update the page whenever @@ -1908,30 +1953,42 @@ func (ds *Datastore) EnrollHost(ctx context.Context, isMDMEnabled bool, osqueryH level.Info(ds.logger).Log("hostIDError", err.Error()) return ctxerr.Wrap(ctx, err, "insert host") } - hostID, _ := result.LastInsertId() + lastInsertID, _ := result.LastInsertId() const sqlHostDisplayName = ` INSERT INTO host_display_names (host_id, display_name) VALUES (?, '') ` - _, err = tx.ExecContext(ctx, sqlHostDisplayName, hostID) + _, err = tx.ExecContext(ctx, sqlHostDisplayName, lastInsertID) if err != nil { return ctxerr.Wrap(ctx, err, "insert host_display_names") } - matchedID = uint(hostID) - + hostID = uint(lastInsertID) default: + hostID = enrolledHostInfo.ID + // Prevent hosts from enrolling too often with the same identifier. // Prior to adding this we saw many hosts (probably VMs) with the // same identifier competing for enrollment and causing perf issues. - if cooldown > 0 && time.Since(lastEnrolledAt) < cooldown { + if cooldown > 0 && time.Since(enrolledHostInfo.LastEnrolledAt) < cooldown { return backoff.Permanent(ctxerr.Errorf(ctx, "host identified by %s enrolling too often", osqueryHostID)) } - if err := deleteAllPolicyMemberships(ctx, tx, []uint{matchedID}); err != nil { + if enrolledHostInfo.NodeKeySet { + // This means a osquery host already enrolled at this hosts entry. + // This can happen if two devices have duplicate hardware identifiers or + // if osquery.db was deleted from the device (e.g. uninstall+install). + level.Warn(ds.logger).Log( + "msg", "osquery host with duplicate identifier has enrolled in Fleet and will overwrite existing host data", + "identifier", hardwareUUID, + "host_id", enrolledHostInfo.ID, + ) + } + + if err := deleteAllPolicyMemberships(ctx, tx, []uint{enrolledHostInfo.ID}); err != nil { return ctxerr.Wrap(ctx, err, "cleanup policy membership on re-enroll") } // clear any host_mdm_actions following re-enrollment here - if _, err := tx.ExecContext(ctx, `DELETE FROM host_mdm_actions WHERE host_id = ?`, matchedID); err != nil { + if _, err := tx.ExecContext(ctx, `DELETE FROM host_mdm_actions WHERE host_id = ?`, enrolledHostInfo.ID); err != nil { return ctxerr.Wrap(ctx, err, "error clearing host_mdm_actions") } @@ -1946,7 +2003,7 @@ func (ds *Datastore) EnrollHost(ctx context.Context, isMDMEnabled bool, osqueryH hardware_serial = COALESCE(NULLIF(hardware_serial, ''), ?) WHERE id = ? ` - _, err := tx.ExecContext(ctx, sqlUpdate, nodeKey, teamID, osqueryHostID, hardwareUUID, hardwareSerial, matchedID) + _, err := tx.ExecContext(ctx, sqlUpdate, nodeKey, teamID, osqueryHostID, hardwareUUID, hardwareSerial, enrolledHostInfo.ID) if err != nil { return ctxerr.Wrap(ctx, err, "update host") } @@ -1955,7 +2012,7 @@ func (ds *Datastore) EnrollHost(ctx context.Context, isMDMEnabled bool, osqueryH _, err = tx.ExecContext(ctx, ` INSERT INTO host_seen_times (host_id, seen_time) VALUES (?, ?) ON DUPLICATE KEY UPDATE seen_time = VALUES(seen_time)`, - matchedID, time.Now().UTC()) + hostID, time.Now().UTC()) if err != nil { return ctxerr.Wrap(ctx, err, "new host seen time") } @@ -2012,11 +2069,11 @@ func (ds *Datastore) EnrollHost(ctx context.Context, isMDMEnabled bool, osqueryH WHERE h.id = ? LIMIT 1 ` - err = sqlx.GetContext(ctx, tx, &host, sqlSelect, matchedID) + err = sqlx.GetContext(ctx, tx, &host, sqlSelect, hostID) if err != nil { return ctxerr.Wrap(ctx, err, "getting the host to return") } - _, err = tx.ExecContext(ctx, `INSERT IGNORE INTO label_membership (host_id, label_id) VALUES (?, (SELECT id FROM labels WHERE name = 'All Hosts' AND label_type = 1))`, matchedID) + _, err = tx.ExecContext(ctx, `INSERT IGNORE INTO label_membership (host_id, label_id) VALUES (?, (SELECT id FROM labels WHERE name = 'All Hosts' AND label_type = 1))`, hostID) if err != nil { return ctxerr.Wrap(ctx, err, "insert new host into all hosts label") } diff --git a/server/datastore/mysql/hosts_test.go b/server/datastore/mysql/hosts_test.go index 26f8dc41c8..634b50d61d 100644 --- a/server/datastore/mysql/hosts_test.go +++ b/server/datastore/mysql/hosts_test.go @@ -8069,6 +8069,62 @@ func testHostsEnrollOrbit(t *testing.T, ds *Datastore) { scenarioC(platform) }) } + + // Scenario D: + // - Fleet with MDM disabled. + // - two linux|darwin|windows hosts with the same hardware identifiers (e.g. two cloned VMs). + // - fleetd running with host identifier set to uuid (default). + // - orbit enrolls first, then osquery + // Expected output: The two fleetd instances should be enrolled as one host. + scenarioD := func(platform string) { + dupUUID := uuid.New().String() + dupHWSerial := uuid.New().String() + + h1Orbit, err := ds.EnrollOrbit(ctx, false, fleet.OrbitHostInfo{ + HardwareUUID: dupUUID, + HardwareSerial: dupHWSerial, + Platform: platform, + }, uuid.New().String(), nil) + require.NoError(t, err) + h1OrbitFetched, err := ds.Host(ctx, h1Orbit.ID) + require.NoError(t, err) + time.Sleep(1 * time.Second) // to test the update of last_enrolled_at + h1Osquery, err := ds.EnrollHost(ctx, false, dupUUID, dupUUID, dupHWSerial, uuid.New().String(), nil, 0) + require.NoError(t, err) + h1OsqueryFetched, err := ds.Host(ctx, h1Osquery.ID) + require.NoError(t, err) + require.NotEqual(t, h1OrbitFetched.LastEnrolledAt, h1OsqueryFetched.LastEnrolledAt) + require.Equal(t, h1Orbit.ID, h1Osquery.ID) + time.Sleep(1 * time.Second) // to test the update of last_enrolled_at + h2Orbit, err := ds.EnrollOrbit(ctx, false, fleet.OrbitHostInfo{ + HardwareUUID: dupUUID, + HardwareSerial: dupHWSerial, + Platform: platform, + }, uuid.New().String(), nil) + require.NoError(t, err) + h2OrbitFetched, err := ds.Host(ctx, h2Orbit.ID) + require.NoError(t, err) + require.NotEqual(t, h1OsqueryFetched.LastEnrolledAt, h2OrbitFetched.LastEnrolledAt) + time.Sleep(1 * time.Second) // to test the update of last_enrolled_at + h2Osquery, err := ds.EnrollHost(ctx, false, dupUUID, dupUUID, dupHWSerial, uuid.New().String(), nil, 0) + require.NoError(t, err) + require.Equal(t, h2Orbit.ID, h2Osquery.ID) + h2OsqueryFetched, err := ds.Host(ctx, h2Osquery.ID) + require.NoError(t, err) + require.NotEqual(t, h2OrbitFetched.LastEnrolledAt, h2OsqueryFetched.LastEnrolledAt) + + // the hosts compete for the host entry (all have same row id) + require.Equal(t, h1Orbit.ID, h2Orbit.ID) + require.Equal(t, h1Orbit.ID, h1Osquery.ID) + require.Equal(t, h2Orbit.ID, h2Osquery.ID) + } + for _, platform := range []string{"ubuntu", "windows", "darwin"} { + platform := platform + t.Run("scenarioD_"+platform, func(t *testing.T) { + t.Parallel() + scenarioD(platform) + }) + } } func testHostsEnrollUpdatesMissingInfo(t *testing.T, ds *Datastore) {