From 4b21ed571b2b61cc3dbb34677ee9565e43e0dc0a Mon Sep 17 00:00:00 2001 From: Ian Littman Date: Mon, 9 Dec 2024 15:50:28 -0600 Subject: [PATCH] Fix duplicate queries when pulling query stats for a host (#24514) For #23488. We see duplicates for queries that show up in both WHEREs since UNION ALL doesn't deduplicate. Since we're grabbing all of the same columns, GROUP BY'ing all columns on the final result gets us a deduplicated set without having to do any cleanup server-side. # Checklist for submitter If some of the following don't apply, delete the relevant line. - [x] Changes file added for user-visible changes in `changes/`, `orbit/changes/` or `ee/fleetd-chrome/changes`. See [Changes files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/Committing-Changes.md#changes-files) for more information. - [x] Added/updated tests - [x] Manual QA for all new/changed functionality --- changes/23488-host-duplicate-queries | 1 + server/datastore/mysql/hosts.go | 7 +++- server/datastore/mysql/hosts_test.go | 56 ++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 changes/23488-host-duplicate-queries diff --git a/changes/23488-host-duplicate-queries b/changes/23488-host-duplicate-queries new file mode 100644 index 0000000000..7aad235231 --- /dev/null +++ b/changes/23488-host-duplicate-queries @@ -0,0 +1 @@ +* Fix duplicate queries in query stats list in host details diff --git a/server/datastore/mysql/hosts.go b/server/datastore/mysql/hosts.go index 5081acf2f8..a044e96998 100644 --- a/server/datastore/mysql/hosts.go +++ b/server/datastore/mysql/hosts.go @@ -467,7 +467,12 @@ func loadHostScheduledQueryStatsDB(ctx context.Context, db sqlx.QueryerContext, GROUP BY q.id ` - sqlQuery := baseQuery + filter1 + " UNION ALL " + baseQuery + filter2 + finalColumns := `id, name, description, team_id, schedule_interval, discard_data, automations_enabled, + last_fetched, average_memory, denylisted, executions, last_executed, output_size, system_time, + user_time, wall_time` + + sqlQuery := `SELECT ` + finalColumns + ` FROM (` + + baseQuery + filter1 + " UNION ALL " + baseQuery + filter2 + `) qs GROUP BY ` + finalColumns args := []interface{}{ pastDate, diff --git a/server/datastore/mysql/hosts_test.go b/server/datastore/mysql/hosts_test.go index 51e12566a6..1c92299509 100644 --- a/server/datastore/mysql/hosts_test.go +++ b/server/datastore/mysql/hosts_test.go @@ -123,6 +123,7 @@ func TestHosts(t *testing.T) { {"HostsIncludesScheduledQueriesInPackStats", testHostsIncludesScheduledQueriesInPackStats}, {"HostsAllPackStats", testHostsAllPackStats}, {"HostsPackStatsMultipleHosts", testHostsPackStatsMultipleHosts}, + {"HostsPackStatsNoDuplication", testHostsPackStatsNoDuplication}, {"HostsPackStatsForPlatform", testHostsPackStatsForPlatform}, {"HostsReadsLessRows", testHostsReadsLessRows}, {"HostsNoSeenTime", testHostsNoSeenTime}, @@ -4627,6 +4628,61 @@ func testHostsPackStatsMultipleHosts(t *testing.T, ds *Datastore) { } } +// See #22384. +func testHostsPackStatsNoDuplication(t *testing.T, ds *Datastore) { + osqueryHostID, _ := server.GenerateRandomText(10) + host, err := ds.NewHost(context.Background(), &fleet.Host{ + DetailUpdatedAt: time.Now(), + LabelUpdatedAt: time.Now(), + PolicyUpdatedAt: time.Now(), + SeenTime: time.Now(), + NodeKey: ptr.String("1"), + UUID: "1", + Hostname: "foo.local", + PrimaryIP: "192.168.1.1", + PrimaryMac: "30-65-EC-6F-C4-58", + Platform: "darwin", + OsqueryHostID: &osqueryHostID, + }) + require.NoError(t, err) + require.NotNil(t, host) + + query, err := ds.NewQuery(context.Background(), &fleet.Query{ + Name: "global-time", + Query: "select * from time", + Saved: true, + TeamID: nil, + Interval: 30, + AutomationsEnabled: false, + Logging: fleet.LoggingSnapshot, + }) + require.NoError(t, err) + + host, err = ds.Host(context.Background(), host.ID) + require.NoError(t, err) + + // host should see just one stats entry at this point + host, err = ds.Host(context.Background(), host.ID) + require.NoError(t, err) + packStats := host.PackStats + require.Len(t, packStats, 1) + require.Len(t, packStats[0].QueryStats, 1) + + // record query results + require.NoError(t, ds.OverwriteQueryResultRows(context.Background(), []*fleet.ScheduledQueryResultRow{{ + QueryID: query.ID, + HostID: host.ID, + Data: ptr.RawMessage(json.RawMessage(`{"foo": "bar"}`)), + }}, fleet.DefaultMaxQueryReportRows)) + + // host should still see just one stats entry at this point, despite seeing stats from both queries in the UNION + host, err = ds.Host(context.Background(), host.ID) + require.NoError(t, err) + packStats = host.PackStats + require.Len(t, packStats, 1) + require.Len(t, packStats[0].QueryStats, 1) +} + // See #2964. func testHostsPackStatsForPlatform(t *testing.T, ds *Datastore) { osqueryHostID1, _ := server.GenerateRandomText(10)