mirror of
https://github.com/fleetdm/fleet
synced 2026-04-21 21:47:20 +00:00
Resolves #33254 This can be reproduced locally by running the following "high load" test: Run 500 hosts using osquery-perf: ``` go run ./cmd/osquery-perf --enroll_secret ... \ --host_count 500 \ --server_url https://localhost:8080 \ --live_query_fail_prob 0.0 \ --live_query_no_results_prob 0.0 \ --orbit_prob 0.0 \ --http_message_signature_prob 0.0 ``` Run `stress_test_live_queries.sh`: ``` #!/bin/bash while true; do curl -v -k -X POST -H "Authorization: Bearer $TEST_TOKEN" https://localhost:8080/api/latest/fleet/queries/$SAVED_QUERY_ID/run -d '{"host_ids": [<500 comma-separated host ids>]}' done ``` Use "Redis Insight" or the like and you will start to see `livequery:{$CAMPAIGN_ID}` keys with `No limit` (which is the bug): <img width="1380" height="227" alt="Screenshot 2025-10-07 at 3 10 26 PM" src="https://github.com/user-attachments/assets/30434348-3217-40c4-8ebc-bab5ceb4daa9" /> - [X] Changes file added for user-visible changes in `changes/`, `orbit/changes/` or `ee/fleetd-chrome/changes`. ## Testing - [x] Added/updated automated tests - [x] QA'd all new/changed functionality manually <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Bug Fixes - Prevent lingering Redis keys for live queries by ensuring keys are cleaned up and not recreated when completing/canceling non-existent queries. - Improves resource usage and avoids stale state in live query processing. - Tests - Added tests verifying proper retrieval/completion behavior and that no Redis key is created for non-existent live queries. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Ian Littman <iansltx@gmail.com>
266 lines
7.7 KiB
Go
266 lines
7.7 KiB
Go
package live_query
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/fleetdm/fleet/v4/server/datastore/redis"
|
|
"github.com/fleetdm/fleet/v4/server/fleet"
|
|
redigo "github.com/gomodule/redigo/redis"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
var testFunctions = [...]func(*testing.T, fleet.LiveQueryStore){
|
|
testLiveQuery,
|
|
testLiveQueryNoTargets,
|
|
testLiveQueryStopQuery,
|
|
testLiveQueryExpiredQuery,
|
|
testLiveQueryOnlyExpired,
|
|
testLiveQueryCleanupInactive,
|
|
testLiveQuerySetBitOnlyIfKeyExists,
|
|
}
|
|
|
|
func testLiveQuery(t *testing.T, store fleet.LiveQueryStore) {
|
|
queries, err := store.QueriesForHost(1)
|
|
assert.NoError(t, err)
|
|
assert.Len(t, queries, 0)
|
|
queries, err = store.QueriesForHost(3)
|
|
assert.NoError(t, err)
|
|
assert.Len(t, queries, 0)
|
|
|
|
assert.NoError(t, store.RunQuery("test", "select 1", []uint{1, 3}))
|
|
assert.NoError(t, store.RunQuery("test2", "select 2", []uint{3}))
|
|
assert.NoError(t, store.RunQuery("test3", "select 3", []uint{1}))
|
|
assert.NoError(t, store.RunQuery("test4", "select 4", []uint{4}))
|
|
|
|
queries, err = store.QueriesForHost(1)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t,
|
|
map[string]string{
|
|
"test": "select 1",
|
|
"test3": "select 3",
|
|
},
|
|
queries,
|
|
)
|
|
queries, err = store.QueriesForHost(2)
|
|
assert.NoError(t, err)
|
|
assert.Len(t, queries, 0)
|
|
queries, err = store.QueriesForHost(3)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t,
|
|
map[string]string{
|
|
"test": "select 1",
|
|
"test2": "select 2",
|
|
},
|
|
queries,
|
|
)
|
|
|
|
assert.NoError(t, store.QueryCompletedByHost("test", 1))
|
|
assert.NoError(t, store.QueryCompletedByHost("test2", 3))
|
|
|
|
queries, err = store.QueriesForHost(1)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t,
|
|
map[string]string{
|
|
"test3": "select 3",
|
|
},
|
|
queries,
|
|
)
|
|
queries, err = store.QueriesForHost(2)
|
|
assert.NoError(t, err)
|
|
assert.Len(t, queries, 0)
|
|
queries, err = store.QueriesForHost(3)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t,
|
|
map[string]string{
|
|
"test": "select 1",
|
|
},
|
|
queries,
|
|
)
|
|
}
|
|
|
|
func testLiveQueryNoTargets(t *testing.T, store fleet.LiveQueryStore) {
|
|
assert.Error(t, store.RunQuery("test", "select 1", []uint{}))
|
|
}
|
|
|
|
func testLiveQueryStopQuery(t *testing.T, store fleet.LiveQueryStore) {
|
|
require.NoError(t, store.RunQuery("test", "select 1", []uint{1, 3}))
|
|
require.NoError(t, store.RunQuery("test2", "select 2", []uint{1, 3}))
|
|
require.NoError(t, store.StopQuery("test"))
|
|
|
|
queries, err := store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
assert.Len(t, queries, 1)
|
|
}
|
|
|
|
func testLiveQueryExpiredQuery(t *testing.T, store fleet.LiveQueryStore) {
|
|
oldModulo := cleanupExpiredQueriesModulo
|
|
cleanupExpiredQueriesModulo = 1 // run the cleanup each time
|
|
t.Cleanup(func() { cleanupExpiredQueriesModulo = oldModulo })
|
|
|
|
require.NoError(t, store.RunQuery("test", "select 1", []uint{1}))
|
|
|
|
// simulate a "test2" live query that has expired but is still in the set
|
|
pool := store.(*redisLiveQuery).pool
|
|
conn := redis.ConfigureDoer(pool, pool.Get())
|
|
defer conn.Close()
|
|
_, err := conn.Do("SADD", activeQueriesKey, "test2")
|
|
require.NoError(t, err)
|
|
|
|
queries, err := store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
assert.Len(t, queries, 1)
|
|
assert.Equal(t, map[string]string{"test": "select 1"}, queries)
|
|
|
|
assert.Eventually(t, func() bool {
|
|
activeNames, err := redigo.Strings(conn.Do("SMEMBERS", activeQueriesKey))
|
|
require.NoError(t, err)
|
|
if len(activeNames) == 1 && activeNames[0] == "test" {
|
|
return true
|
|
}
|
|
return false
|
|
}, 5*time.Second, 100*time.Millisecond)
|
|
}
|
|
|
|
func testLiveQueryOnlyExpired(t *testing.T, store fleet.LiveQueryStore) {
|
|
oldModulo := cleanupExpiredQueriesModulo
|
|
cleanupExpiredQueriesModulo = 1 // run the cleanup each time
|
|
t.Cleanup(func() { cleanupExpiredQueriesModulo = oldModulo })
|
|
|
|
// simulate a "test" live query that has expired but is still in the set
|
|
pool := store.(*redisLiveQuery).pool
|
|
conn := redis.ConfigureDoer(pool, pool.Get())
|
|
defer conn.Close()
|
|
_, err := conn.Do("SADD", activeQueriesKey, "test")
|
|
require.NoError(t, err)
|
|
|
|
queries, err := store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
assert.Len(t, queries, 0)
|
|
|
|
assert.Eventually(t, func() bool {
|
|
activeNames, err := redigo.Strings(conn.Do("SMEMBERS", activeQueriesKey))
|
|
require.NoError(t, err)
|
|
return len(activeNames) == 0
|
|
}, 5*time.Second, 100*time.Millisecond)
|
|
}
|
|
|
|
func testLiveQueryCleanupInactive(t *testing.T, store fleet.LiveQueryStore) {
|
|
ctx := context.Background()
|
|
|
|
// get a raw Redis connection to make direct checks
|
|
pool := store.(*redisLiveQuery).pool
|
|
conn := redis.ConfigureDoer(pool, pool.Get())
|
|
defer conn.Close()
|
|
|
|
// run a few live queries, making them active in Redis
|
|
err := store.RunQuery("1", "SELECT 1", []uint{1, 2, 3})
|
|
require.NoError(t, err)
|
|
err = store.RunQuery("2", "SELECT 2", []uint{4})
|
|
require.NoError(t, err)
|
|
err = store.RunQuery("3", "SELECT 3", []uint{5, 6})
|
|
require.NoError(t, err)
|
|
err = store.RunQuery("4", "SELECT 4", []uint{1, 2, 5})
|
|
require.NoError(t, err)
|
|
err = store.RunQuery("5", "SELECT 5", []uint{2, 3, 7})
|
|
require.NoError(t, err)
|
|
|
|
activeNames, err := store.LoadActiveQueryNames()
|
|
require.NoError(t, err)
|
|
require.ElementsMatch(t, []string{"1", "2", "3", "4", "5"}, activeNames)
|
|
|
|
// sanity-check that the queries are properly stored
|
|
m, err := store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
require.Equal(t, map[string]string{"1": "SELECT 1", "4": "SELECT 4"}, m)
|
|
|
|
// simulate that only campaigns 2 and 4 are still active, cleanup the rest
|
|
err = store.CleanupInactiveQueries(ctx, []uint{1, 3, 5})
|
|
require.NoError(t, err)
|
|
|
|
activeNames, err = store.LoadActiveQueryNames()
|
|
require.NoError(t, err)
|
|
require.ElementsMatch(t, []string{"2", "4"}, activeNames)
|
|
|
|
m, err = store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
require.Equal(t, map[string]string{"4": "SELECT 4"}, m)
|
|
|
|
// explicitly mark campaign 4 as stopped
|
|
err = store.StopQuery("4")
|
|
require.NoError(t, err)
|
|
|
|
// no more queries for host 1
|
|
m, err = store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
require.Empty(t, m)
|
|
|
|
// only campaign 2 remains, for host 4
|
|
m, err = store.QueriesForHost(4)
|
|
require.NoError(t, err)
|
|
require.Equal(t, map[string]string{"2": "SELECT 2"}, m)
|
|
|
|
// simulate that there are no inactive campaigns to cleanup
|
|
err = store.CleanupInactiveQueries(ctx, nil)
|
|
require.NoError(t, err)
|
|
|
|
activeNames, err = store.LoadActiveQueryNames()
|
|
require.NoError(t, err)
|
|
require.ElementsMatch(t, []string{"2"}, activeNames)
|
|
|
|
// simulate that all campaigns are inactive, cleanup all
|
|
err = store.CleanupInactiveQueries(ctx, []uint{1, 2, 3, 4, 5})
|
|
require.NoError(t, err)
|
|
|
|
activeNames, err = store.LoadActiveQueryNames()
|
|
require.NoError(t, err)
|
|
require.Empty(t, activeNames)
|
|
|
|
m, err = store.QueriesForHost(4)
|
|
require.NoError(t, err)
|
|
require.Empty(t, m)
|
|
}
|
|
|
|
func testLiveQuerySetBitOnlyIfKeyExists(t *testing.T, store fleet.LiveQueryStore) {
|
|
// Create a live query campaign.
|
|
err := store.RunQuery("test", "SELECT 1;", []uint{1})
|
|
require.NoError(t, err)
|
|
|
|
// Get the query for the host.
|
|
queries, err := store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
require.Equal(t,
|
|
map[string]string{
|
|
"test": "SELECT 1;",
|
|
},
|
|
queries,
|
|
)
|
|
|
|
// Mark query as completed by host.
|
|
err = store.QueryCompletedByHost("test", 1)
|
|
require.NoError(t, err)
|
|
|
|
// Query should not be returned anymore as it was marked as completed for this host.
|
|
queries, err = store.QueriesForHost(1)
|
|
require.NoError(t, err)
|
|
require.Empty(t, queries)
|
|
|
|
// A host could be attempting to write a result for a query that was already deleted.
|
|
err = store.QueryCompletedByHost("test-2", 1)
|
|
require.NoError(t, err)
|
|
|
|
// Let's test that such key was not created.
|
|
|
|
// get a raw Redis connection to make direct checks
|
|
pool := store.(*redisLiveQuery).pool
|
|
conn := redis.ConfigureDoer(pool, pool.Get())
|
|
t.Cleanup(func() {
|
|
conn.Close()
|
|
})
|
|
|
|
n, err := redigo.Int(conn.Do("EXISTS", queryKeyPrefix+"{test-2}"))
|
|
require.NoError(t, err)
|
|
require.Zero(t, n)
|
|
}
|