mirror of
https://github.com/fleetdm/fleet
synced 2026-04-21 13:37:30 +00:00
<!-- Add the related story/sub-task/bug number, like Resolves #123, or remove if NA --> **Related issue:** Resolves #42836 This is another hot path optimization. ## Before When a host submits policy results via `SubmitDistributedQueryResults`, the system needed to determine which policies "flipped" (changed from passing to failing or vice versa). Each consumer computed this independently: ``` SubmitDistributedQueryResults(policyResults) | +-- processScriptsForNewlyFailingPolicies | filter to failing policies with scripts | BUILD SUBSET of results | CALL FlippingPoliciesForHost(subset) <-- DB query #1 | convert result to set, filter, queue scripts | +-- processSoftwareForNewlyFailingPolicies | filter to failing policies with installers | BUILD SUBSET of results | CALL FlippingPoliciesForHost(subset) <-- DB query #2 | convert result to set, filter, queue installs | +-- processVPPForNewlyFailingPolicies | filter to failing policies with VPP apps | BUILD SUBSET of results | CALL FlippingPoliciesForHost(subset) <-- DB query #3 | convert result to set, filter, queue VPP | +-- webhook filtering | filter to webhook-enabled policies | CALL FlippingPoliciesForHost(subset) <-- DB query #4 | register flipped policies in Redis | +-- RecordPolicyQueryExecutions CALL FlippingPoliciesForHost(all results) <-- DB query #5 reset attempt counters for newly passing INSERT/UPDATE policy_membership ``` Each `FlippingPoliciesForHost` call runs `SELECT policy_id, passes FROM policy_membership WHERE host_id = ? AND policy_id IN (?)`. All 5 queries hit the same table for the same host before `policy_membership` is updated, so they all see identical state. Each consumer also built intermediate maps to narrow down to its subset before calling `FlippingPoliciesForHost`, then converted the result into yet another set for filtering. This meant 3-4 temporary maps per consumer. ## After ``` SubmitDistributedQueryResults(policyResults) | CALL FlippingPoliciesForHost(all results) <-- single DB query build newFailingSet, normalize newPassing | +-- processScriptsForNewlyFailingPolicies | filter to failing policies with scripts | CHECK newFailingSet (in-memory map lookup) | queue scripts | +-- processSoftwareForNewlyFailingPolicies | filter to failing policies with installers | CHECK newFailingSet (in-memory map lookup) | queue installs | +-- processVPPForNewlyFailingPolicies | filter to failing policies with VPP apps | CHECK newFailingSet (in-memory map lookup) | queue VPP | +-- webhook filtering | filter to webhook-enabled policies | FILTER newFailing/newPassing by policy IDs (in-memory) | register flipped policies in Redis | +-- RecordPolicyQueryExecutions USE pre-computed newPassing (skip DB query) reset attempt counters for newly passing INSERT/UPDATE policy_membership ``` The intermediate subset maps and per-consumer set conversions are removed. Each process function goes directly from "policies with associated automation" to "is this policy in newFailingSet?" in a single map lookup. # Checklist for submitter If some of the following don't apply, delete the relevant line. - [x] Changes file added for user-visible changes in `changes/`, `orbit/changes/` or `ee/fleetd-chrome/changes`. ## Testing - [x] Added/updated automated tests - [x] QA'd all new/changed functionality manually <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Performance Improvements** * Reduced redundant database queries during policy result submissions by computing flipping policies once per host check-in instead of multiple times. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
280 lines
10 KiB
Go
280 lines
10 KiB
Go
package async
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/fleetdm/fleet/v4/server/datastore/mysql"
|
|
"github.com/fleetdm/fleet/v4/server/datastore/redis/redistest"
|
|
"github.com/fleetdm/fleet/v4/server/fleet"
|
|
"github.com/fleetdm/fleet/v4/server/mock"
|
|
"github.com/fleetdm/fleet/v4/server/ptr"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestCollect(t *testing.T) {
|
|
ds := mysql.CreateMySQLDS(t)
|
|
|
|
oldMaxPolicy := maxRedisPolicyResultsPerHost
|
|
maxRedisPolicyResultsPerHost = 3
|
|
t.Cleanup(func() {
|
|
maxRedisPolicyResultsPerHost = oldMaxPolicy
|
|
})
|
|
|
|
t.Run("Label", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "label_membership", false, false, false)
|
|
testCollectLabelQueryExecutions(t, ds, pool)
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "label_membership", true, true, false)
|
|
testCollectLabelQueryExecutions(t, ds, pool)
|
|
})
|
|
})
|
|
|
|
t.Run("Policy", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "policy_pass", false, false, false)
|
|
testCollectPolicyQueryExecutions(t, ds, pool)
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "policy_pass", true, true, false)
|
|
testCollectPolicyQueryExecutions(t, ds, pool)
|
|
})
|
|
})
|
|
|
|
t.Run("Host Last Seen", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "host_last_seen", false, false, false)
|
|
testCollectHostsLastSeen(t, ds, pool)
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "host_last_seen", true, true, false)
|
|
testCollectHostsLastSeen(t, ds, pool)
|
|
})
|
|
})
|
|
|
|
t.Run("Scheduled Query Stats", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "scheduled_query_stats", false, false, false)
|
|
testCollectScheduledQueryStats(t, ds, pool)
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
defer mysql.TruncateTables(t, ds)
|
|
pool := redistest.SetupRedis(t, "scheduled_query_stats", true, true, false)
|
|
testCollectScheduledQueryStats(t, ds, pool)
|
|
})
|
|
})
|
|
}
|
|
|
|
func TestRecord(t *testing.T) {
|
|
ds := new(mock.Store)
|
|
ds.RecordLabelQueryExecutionsFunc = func(ctx context.Context, host *fleet.Host, results map[uint]*bool, ts time.Time, deferred bool) error {
|
|
return nil
|
|
}
|
|
ds.AsyncBatchUpdateLabelTimestampFunc = func(ctx context.Context, ids []uint, ts time.Time) error {
|
|
return nil
|
|
}
|
|
ds.RecordPolicyQueryExecutionsFunc = func(ctx context.Context, host *fleet.Host, results map[uint]*bool, ts time.Time, deferred bool, newlyPassingPolicyIDs []uint) error {
|
|
return nil
|
|
}
|
|
ds.AsyncBatchInsertPolicyMembershipFunc = func(ctx context.Context, batch []fleet.PolicyMembershipResult) error {
|
|
return nil
|
|
}
|
|
ds.AsyncBatchUpdatePolicyTimestampFunc = func(ctx context.Context, ids []uint, ts time.Time) error {
|
|
return nil
|
|
}
|
|
ds.SaveHostPackStatsFunc = func(ctx context.Context, teamID *uint, hid uint, stats []fleet.PackStats) error {
|
|
return nil
|
|
}
|
|
ds.AsyncBatchSaveHostsScheduledQueryStatsFunc = func(ctx context.Context, batch map[uint][]fleet.ScheduledQueryStats, batchSize int) (int, error) {
|
|
return 1, nil
|
|
}
|
|
ds.ScheduledQueryIDsByNameFunc = func(ctx context.Context, batchSize int, names ...[2]string) ([]uint, error) {
|
|
return make([]uint, len(names)), nil
|
|
}
|
|
|
|
t.Run("Label", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "label_membership", false, false, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordLabelQueryExecutionsSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordLabelQueryExecutionsAsync(t, ds, pool) })
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "label_membership", true, true, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordLabelQueryExecutionsSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordLabelQueryExecutionsAsync(t, ds, pool) })
|
|
})
|
|
})
|
|
|
|
t.Run("Policy", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "policy_pass", false, false, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordPolicyQueryExecutionsSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordPolicyQueryExecutionsAsync(t, ds, pool) })
|
|
t.Run("sync", func(t *testing.T) { testRecordPolicyQueryExecutionsNoPoliciesSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordPolicyQueryExecutionsNoPoliciesAsync(t, ds, pool) })
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "policy_pass", true, true, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordPolicyQueryExecutionsSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordPolicyQueryExecutionsAsync(t, ds, pool) })
|
|
t.Run("sync", func(t *testing.T) { testRecordPolicyQueryExecutionsNoPoliciesSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordPolicyQueryExecutionsNoPoliciesAsync(t, ds, pool) })
|
|
})
|
|
})
|
|
|
|
t.Run("Host Last Seen", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "host_last_seen", false, false, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordHostLastSeenSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordHostLastSeenAsync(t, ds, pool) })
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "host_last_seen", true, true, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordHostLastSeenSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordHostLastSeenAsync(t, ds, pool) })
|
|
})
|
|
})
|
|
|
|
t.Run("Scheduled Query Stats", func(t *testing.T) {
|
|
t.Run("standalone", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "scheduled_query_stats", false, false, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordScheduledQueryStatsSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordScheduledQueryStatsAsync(t, ds, pool) })
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, "scheduled_query_stats", true, true, false)
|
|
t.Run("sync", func(t *testing.T) { testRecordScheduledQueryStatsSync(t, ds, pool) })
|
|
t.Run("async", func(t *testing.T) { testRecordScheduledQueryStatsAsync(t, ds, pool) })
|
|
})
|
|
})
|
|
}
|
|
|
|
func TestActiveHostIDsSet(t *testing.T) {
|
|
const zkey = "testActiveHostIDsSet"
|
|
|
|
runTest := func(t *testing.T, pool fleet.RedisPool) {
|
|
activeHosts, err := loadActiveHostIDs(pool, zkey, 10)
|
|
require.NoError(t, err)
|
|
require.Len(t, activeHosts, 0)
|
|
|
|
// add a few hosts with a timestamp that increases by a second for each
|
|
// note that host IDs will be 1..10 (t[0] == host 1, t[1] == host 2, etc.)
|
|
tpurgeNone := time.Now()
|
|
ts := make([]int64, 10)
|
|
for i := range ts {
|
|
if i > 0 {
|
|
ts[i] = time.Unix(ts[i-1], 0).Add(time.Second).Unix()
|
|
} else {
|
|
ts[i] = tpurgeNone.Add(time.Second).Unix()
|
|
}
|
|
|
|
// none ever get deleted, all are after tpurgeNone
|
|
n, err := storePurgeActiveHostID(pool, zkey, uint(i+1), time.Unix(ts[i], 0), tpurgeNone) //nolint:gosec // dismiss G115
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, n)
|
|
}
|
|
|
|
activeHosts, err = loadActiveHostIDs(pool, zkey, 10)
|
|
require.NoError(t, err)
|
|
require.Len(t, activeHosts, len(ts))
|
|
for i, host := range activeHosts {
|
|
require.Equal(t, ts[i], host.LastReported)
|
|
}
|
|
|
|
// store a new one but now use t[1] as purge date - will remove two
|
|
ts2 := ts
|
|
ts2 = append(ts2, time.Unix(ts[len(ts)-1], 0).Add(time.Second).Unix())
|
|
n, err := storePurgeActiveHostID(pool, zkey, uint(len(ts2)), time.Unix(ts2[len(ts2)-1], 0), time.Unix(ts2[1], 0))
|
|
require.NoError(t, err)
|
|
require.Equal(t, 2, n)
|
|
|
|
// report t[3] and t[5] (hosts 4 and 6) as processed
|
|
batch := []hostIDLastReported{
|
|
{HostID: 4, LastReported: ts2[3]},
|
|
{HostID: 6, LastReported: ts2[5]},
|
|
}
|
|
n, err = removeProcessedHostIDs(pool, zkey, batch)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 2, n)
|
|
|
|
// update t[6] of host 7, as if it had reported new data since the load
|
|
newT6 := time.Unix(ts2[len(ts2)-1], 0).Add(time.Second)
|
|
n, err = storePurgeActiveHostID(pool, zkey, 7, newT6, tpurgeNone)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, n)
|
|
|
|
// report t[6] and t[7] (hosts 7 and 8) as processed, but only host 8
|
|
// will get deleted, because the timestamp of host 7 has changed (we pass
|
|
// its old timestamp, to simluate that it changed since loading the
|
|
// information)
|
|
batch = []hostIDLastReported{
|
|
{HostID: 7, LastReported: ts2[6]},
|
|
{HostID: 8, LastReported: ts2[7]},
|
|
}
|
|
n, err = removeProcessedHostIDs(pool, zkey, batch)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, n)
|
|
|
|
// check the remaining active hosts (only 6 remain)
|
|
activeHosts, err = loadActiveHostIDs(pool, zkey, 10)
|
|
require.NoError(t, err)
|
|
require.Len(t, activeHosts, 6)
|
|
want := []hostIDLastReported{
|
|
{HostID: 3, LastReported: ts2[2]},
|
|
{HostID: 5, LastReported: ts2[4]},
|
|
{HostID: 7, LastReported: newT6.Unix()},
|
|
{HostID: 9, LastReported: ts2[8]},
|
|
{HostID: 10, LastReported: ts2[9]},
|
|
{HostID: 11, LastReported: ts2[10]},
|
|
}
|
|
require.ElementsMatch(t, want, activeHosts)
|
|
}
|
|
|
|
t.Run("standalone", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, zkey, false, false, false)
|
|
t.Run("sync", func(t *testing.T) { runTest(t, pool) })
|
|
})
|
|
|
|
t.Run("cluster", func(t *testing.T) {
|
|
pool := redistest.SetupRedis(t, zkey, true, true, false)
|
|
t.Run("sync", func(t *testing.T) { runTest(t, pool) })
|
|
})
|
|
}
|
|
|
|
func createHosts(t *testing.T, ds fleet.Datastore, count int, ts time.Time) []uint {
|
|
ids := make([]uint, count)
|
|
for i := 0; i < count; i++ {
|
|
host, err := ds.NewHost(context.Background(), &fleet.Host{
|
|
DetailUpdatedAt: ts,
|
|
LabelUpdatedAt: ts,
|
|
PolicyUpdatedAt: ts,
|
|
SeenTime: ts,
|
|
OsqueryHostID: ptr.String(fmt.Sprintf("%s%d", t.Name(), i)),
|
|
NodeKey: ptr.String(fmt.Sprintf("%s%d", t.Name(), i)),
|
|
UUID: fmt.Sprintf("%s%d", t.Name(), i),
|
|
Hostname: fmt.Sprintf("%sfoo.local%d", t.Name(), i),
|
|
})
|
|
require.NoError(t, err)
|
|
ids[i] = host.ID
|
|
}
|
|
return ids
|
|
}
|