15378 record empty data results (#15403)

This commit is contained in:
Tim Lee 2023-12-04 08:31:35 -07:00 committed by GitHub
parent 5835cad7e4
commit dc3fc5e6f5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 224 additions and 76 deletions

View file

@ -24,9 +24,7 @@ func (ds *Datastore) OverwriteQueryResultRows(ctx context.Context, rows []*fleet
// Count how many rows are already in the database for the given queryID
var countExisting int
countStmt := `
SELECT COUNT(*) FROM query_results WHERE query_id = ?
`
countStmt := `SELECT COUNT(*) FROM query_results WHERE query_id = ? AND data IS NOT NULL`
err = sqlx.GetContext(ctx, tx, &countExisting, countStmt, queryID)
if err != nil {
return ctxerr.Wrap(ctx, err, "counting existing query results")
@ -95,7 +93,7 @@ func (ds *Datastore) QueryResultRows(ctx context.Context, queryID uint) ([]*flee
h.hostname, h.computer_name, h.hardware_model, h.hardware_serial
FROM query_results qr
LEFT JOIN hosts h ON (qr.host_id=h.id)
WHERE query_id = ?
WHERE query_id = ? AND data IS NOT NULL
`
results := []*fleet.ScheduledQueryResultRow{}
err := sqlx.SelectContext(ctx, ds.reader(ctx), &results, selectStmt, queryID)
@ -108,7 +106,7 @@ func (ds *Datastore) QueryResultRows(ctx context.Context, queryID uint) ([]*flee
func (ds *Datastore) ResultCountForQuery(ctx context.Context, queryID uint) (int, error) {
var count int
err := sqlx.GetContext(ctx, ds.reader(ctx), &count, `SELECT COUNT(*) FROM query_results WHERE query_id = ?`, queryID)
err := sqlx.GetContext(ctx, ds.reader(ctx), &count, `SELECT COUNT(*) FROM query_results WHERE query_id = ? AND data IS NOT NULL`, queryID)
if err != nil {
return 0, ctxerr.Wrap(ctx, err, "counting query results for query")
}
@ -118,7 +116,7 @@ func (ds *Datastore) ResultCountForQuery(ctx context.Context, queryID uint) (int
func (ds *Datastore) ResultCountForQueryAndHost(ctx context.Context, queryID, hostID uint) (int, error) {
var count int
err := sqlx.GetContext(ctx, ds.reader(ctx), &count, `SELECT COUNT(*) FROM query_results WHERE query_id = ? AND host_id = ?`, queryID, hostID)
err := sqlx.GetContext(ctx, ds.reader(ctx), &count, `SELECT COUNT(*) FROM query_results WHERE query_id = ? AND host_id = ? AND data IS NOT NULL`, queryID, hostID)
if err != nil {
return 0, ctxerr.Wrap(ctx, err, "counting query results for query and host")
}

View file

@ -22,8 +22,7 @@ func TestQueryResults(t *testing.T) {
name string
fn func(t *testing.T, ds *Datastore)
}{
{"Save", saveQueryResultRows},
{"Get", getQueryResultRows},
{"Get", testGetQueryResultRows},
{"CountForQuery", testCountResultsForQuery},
{"CountForQueryAndHost", testCountResultsForQueryAndHost},
{"Overwrite", testOverwriteQueryResultRows},
@ -38,13 +37,14 @@ func TestQueryResults(t *testing.T) {
}
}
func saveQueryResultRows(t *testing.T, ds *Datastore) {
func testGetQueryResultRows(t *testing.T, ds *Datastore) {
user := test.NewUser(t, ds, "Test User", "test@example.com", true)
query := test.NewQuery(t, ds, nil, "New Query", "SELECT 1", user.ID, true)
host := test.NewHost(t, ds, "hostname123", "192.168.1.100", "1234", "UI8XB1223", time.Now())
mockTime := time.Now().UTC().Truncate(time.Second)
// Insert 2 Result Rows for Query1 and 1 empty data row
resultRows := []*fleet.ScheduledQueryResultRow{
{
QueryID: query.ID,
@ -62,36 +62,11 @@ func saveQueryResultRows(t *testing.T, ds *Datastore) {
`{"model": "USB Mouse", "vendor": "Logitech"}`,
),
},
}
err := ds.SaveQueryResultRows(context.Background(), resultRows)
require.NoError(t, err)
}
func getQueryResultRows(t *testing.T, ds *Datastore) {
user := test.NewUser(t, ds, "Test User", "test@example.com", true)
query := test.NewQuery(t, ds, nil, "New Query", "SELECT 1", user.ID, true)
host := test.NewHost(t, ds, "hostname123", "192.168.1.100", "1234", "UI8XB1223", time.Now())
mockTime := time.Now().UTC().Truncate(time.Second)
// Insert 2 Result Rows for Query1
resultRows := []*fleet.ScheduledQueryResultRow{
{
QueryID: query.ID,
HostID: host.ID,
LastFetched: mockTime,
Data: json.RawMessage(
`{"model": "USB Keyboard", "vendor": "Apple Inc."}`,
),
},
{
QueryID: query.ID,
HostID: host.ID,
LastFetched: mockTime,
Data: json.RawMessage(
`{"model": "USB Mouse", "vendor": "Logitech"}`,
),
Data: nil,
},
}
@ -162,10 +137,22 @@ func testCountResultsForQuery(t *testing.T, ds *Datastore) {
}`),
},
}
err := ds.SaveQueryResultRows(context.Background(), resultRow)
require.NoError(t, err)
// Insert 1 Result Row with nil Data for Query1
// This should not be counted
resultRowNilData := []*fleet.ScheduledQueryResultRow{
{
QueryID: query1.ID,
HostID: host.ID,
LastFetched: mockTime,
Data: nil,
},
}
err = ds.SaveQueryResultRows(context.Background(), resultRowNilData)
require.NoError(t, err)
// Insert 5 Result Rows for Query2
resultRow2 := []*fleet.ScheduledQueryResultRow{
{
@ -193,7 +180,7 @@ func testCountResultsForQuery(t *testing.T, ds *Datastore) {
require.NoError(t, err)
require.Equal(t, 5, count)
// Returns empty result when no results are found
// Returns 0 when no results are found
count, err = ds.ResultCountForQuery(context.Background(), 999)
require.NoError(t, err)
require.Equal(t, 0, count)
@ -244,6 +231,12 @@ func testCountResultsForQueryAndHost(t *testing.T, ds *Datastore) {
"foo": "bar"
}`),
},
{
QueryID: query2.ID, // This row should not be counted
HostID: host.ID,
LastFetched: mockTime,
Data: nil,
},
}
err := ds.SaveQueryResultRows(context.Background(), resultRows)
@ -325,41 +318,109 @@ func testOverwriteQueryResultRows(t *testing.T, ds *Datastore) {
}
err = ds.OverwriteQueryResultRows(context.Background(), overwriteRows)
require.NoError(t, err)
// Assert that the data has not changed
results, err = ds.QueryResultRowsForHost(context.Background(), overwriteRows[0].QueryID, overwriteRows[0].HostID)
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, overwriteRows[0].QueryID, results[0].QueryID)
require.Equal(t, overwriteRows[0].HostID, results[0].HostID)
require.Equal(t, overwriteRows[0].LastFetched.Unix(), results[0].LastFetched.Unix())
require.JSONEq(t, string(overwriteRows[0].Data), string(results[0].Data))
}
func testQueryResultRowsDoNotExceedMaxRows(t *testing.T, ds *Datastore) {
user := test.NewUser(t, ds, "Test User", "test@example.com", true)
query := test.NewQuery(t, ds, nil, "Overwrite Test Query", "SELECT 1", user.ID, true)
host := test.NewHost(t, ds, "hostname1", "192.168.1.101", "12345", "UI8XB1224", time.Now())
query2 := test.NewQuery(t, ds, nil, "Overwrite Test Query 2", "SELECT 1", user.ID, true)
host1 := test.NewHost(t, ds, "hostname1", "192.168.1.101", "11111", "UI8XB1221", time.Now())
host2 := test.NewHost(t, ds, "hostname2", "192.168.1.101", "22222", "UI8XB1222", time.Now())
host3 := test.NewHost(t, ds, "hostname3", "192.168.1.101", "33333", "UI8XB1223", time.Now())
host4 := test.NewHost(t, ds, "hostname4", "192.168.1.101", "44444", "UI8XB1224", time.Now())
mockTime := time.Now().UTC().Truncate(time.Second)
// Generate more than max rows
rows := fleet.MaxQueryReportRows + 50
largeBatchRows := make([]*fleet.ScheduledQueryResultRow, rows)
for i := 0; i < rows; i++ {
largeBatchRows[i] = &fleet.ScheduledQueryResultRow{
// Generate max rows -1
maxRows := fleet.MaxQueryReportRows - 1
maxMinusOneRows := make([]*fleet.ScheduledQueryResultRow, maxRows)
for i := 0; i < maxRows; i++ {
maxMinusOneRows[i] = &fleet.ScheduledQueryResultRow{
QueryID: query.ID,
HostID: host.ID,
HostID: host1.ID,
LastFetched: mockTime,
Data: json.RawMessage(`{"model": "Bulk Mouse", "vendor": "BulkTech"}`),
}
}
err := ds.OverwriteQueryResultRows(context.Background(), maxMinusOneRows)
require.NoError(t, err)
err := ds.OverwriteQueryResultRows(context.Background(), largeBatchRows)
// Add an empty data rows which do not count towards the max
err = ds.OverwriteQueryResultRows(context.Background(), []*fleet.ScheduledQueryResultRow{
{
QueryID: query.ID,
HostID: host2.ID,
LastFetched: mockTime,
Data: nil,
},
})
require.NoError(t, err)
// Confirm that we can still add a row
err = ds.OverwriteQueryResultRows(context.Background(), []*fleet.ScheduledQueryResultRow{
{
QueryID: query.ID,
HostID: host3.ID,
LastFetched: mockTime,
Data: json.RawMessage(`{"model": "USB Mouse", "vendor": "Logitech"}`),
},
})
require.NoError(t, err)
// Assert that we now have max rows
count, err := ds.ResultCountForQuery(context.Background(), query.ID)
require.NoError(t, err)
require.Equal(t, fleet.MaxQueryReportRows, count)
// Attempt to add another row
err = ds.OverwriteQueryResultRows(context.Background(), []*fleet.ScheduledQueryResultRow{
{
QueryID: query.ID,
HostID: host4.ID,
LastFetched: mockTime,
Data: json.RawMessage(`{"model": "USB Mouse", "vendor": "Logitech"}`),
},
})
require.NoError(t, err)
// Assert that the last row was not added
host4result, err := ds.QueryResultRowsForHost(context.Background(), query.ID, host4.ID)
require.NoError(t, err)
require.Len(t, host4result, 0)
// Generate more than max rows in Query 2
rows := fleet.MaxQueryReportRows + 50
largeBatchRows := make([]*fleet.ScheduledQueryResultRow, rows)
for i := 0; i < rows; i++ {
largeBatchRows[i] = &fleet.ScheduledQueryResultRow{
QueryID: query2.ID,
HostID: host1.ID,
LastFetched: mockTime,
Data: json.RawMessage(`{"model": "Bulk Mouse", "vendor": "BulkTech"}`),
}
}
err = ds.OverwriteQueryResultRows(context.Background(), largeBatchRows)
require.NoError(t, err)
// Confirm only max rows are stored for the queryID
allResults, err := ds.QueryResultRowsForHost(context.Background(), query.ID, host.ID)
allResults, err := ds.QueryResultRowsForHost(context.Background(), query2.ID, host1.ID)
require.NoError(t, err)
require.Len(t, allResults, fleet.MaxQueryReportRows)
// Confirm that new rows are not added when the max is reached
host2 := test.NewHost(t, ds, "hostname2", "192.168.1.102", "678910", "UI8XB1225", time.Now())
newMockTime := mockTime.Add(2 * time.Minute)
overwriteRows := []*fleet.ScheduledQueryResultRow{
{
QueryID: query.ID,
QueryID: query2.ID,
HostID: host2.ID,
LastFetched: newMockTime,
Data: json.RawMessage(
@ -371,7 +432,7 @@ func testQueryResultRowsDoNotExceedMaxRows(t *testing.T, ds *Datastore) {
err = ds.OverwriteQueryResultRows(context.Background(), overwriteRows)
require.NoError(t, err)
host2Results, err := ds.QueryResultRowsForHost(context.Background(), query.ID, host2.ID)
host2Results, err := ds.QueryResultRowsForHost(context.Background(), query2.ID, host2.ID)
require.NoError(t, err)
require.Len(t, host2Results, 0)
}
@ -430,7 +491,7 @@ func (ds *Datastore) SaveQueryResultRows(ctx context.Context, rows []*fleet.Sche
func (ds *Datastore) QueryResultRowsForHost(ctx context.Context, queryID, hostID uint) ([]*fleet.ScheduledQueryResultRow, error) {
selectStmt := `
SELECT query_id, host_id, last_fetched, data FROM query_results
WHERE query_id = ? AND host_id = ?
WHERE query_id = ? AND host_id = ? AND data IS NOT NULL
`
results := []*fleet.ScheduledQueryResultRow{}
err := sqlx.SelectContext(ctx, ds.reader(ctx), &results, selectStmt, queryID, hostID)

View file

@ -1543,11 +1543,6 @@ func (svc *Service) saveResultLogsToQueryReports(ctx context.Context, unmarshale
filtered := getMostRecentResults(unmarshaledResults)
for _, result := range filtered {
// Discard result if there is no snapshot
if len(result.Snapshot) == 0 {
continue
}
dbQuery, ok := queriesDBData[result.QueryName]
if !ok {
// Means the query does not exist with such name anymore. Thus we ignore its result.
@ -1586,6 +1581,18 @@ func (svc *Service) overwriteResultRows(ctx context.Context, result *fleet.Sched
fetchTime := time.Now()
rows := make([]*fleet.ScheduledQueryResultRow, 0, len(result.Snapshot))
// If the snapshot is empty, we still want to save a row with a null value
// to capture LastFetched.
if len(result.Snapshot) == 0 {
rows = append(rows, &fleet.ScheduledQueryResultRow{
QueryID: queryID,
HostID: hostID,
Data: nil,
LastFetched: fetchTime,
})
}
for _, snapshotItem := range result.Snapshot {
row := &fleet.ScheduledQueryResultRow{
QueryID: queryID,

View file

@ -531,7 +531,7 @@ func TestSubmitStatusLogs(t *testing.T) {
assert.Equal(t, status, testLogger.logs)
}
func TestSubmitResultLogs(t *testing.T) {
func TestSubmitResultLogsToLogDestination(t *testing.T) {
ds := new(mock.Store)
svc, ctx := newTestService(t, ds, nil, nil)
@ -691,26 +691,6 @@ func TestSaveResultLogsToQueryReports(t *testing.T) {
},
}
queriesDBData := map[string]*fleet.Query{
"pack/Global/Uptime": {
ID: 1,
DiscardData: false,
Logging: fleet.LoggingSnapshot,
},
}
// Result not saved if result is not a snapshot
notSnapshotResult := []*fleet.ScheduledQueryResult{
{
QueryName: "pack/Global/Uptime",
OsqueryHostID: "1379f59d98f4",
Snapshot: []json.RawMessage{},
UnixTime: 1484078931,
},
}
serv.saveResultLogsToQueryReports(ctx, notSnapshotResult, queriesDBData)
assert.False(t, ds.OverwriteQueryResultRowsFuncInvoked)
// Results not saved if DiscardData is true in Query
discardDataFalse := map[string]*fleet.Query{
"pack/Global/Uptime": {
@ -740,6 +720,108 @@ func TestSaveResultLogsToQueryReports(t *testing.T) {
require.True(t, ds.OverwriteQueryResultRowsFuncInvoked)
}
func TestSubmitResultLogsToQueryResultsWithEmptySnapShot(t *testing.T) {
ds := new(mock.Store)
svc, ctx := newTestService(t, ds, nil, nil)
host := fleet.Host{
ID: 999,
}
ctx = hostctx.NewContext(ctx, &host)
logs := []string{
`{"snapshot":[],"action":"snapshot","name":"pack/Global/query_no_rows","hostIdentifier":"1379f59d98f4","calendarTime":"Tue Jan 10 20:08:51 2017 UTC","unixTime":1484078931,"decorations":{"host_uuid":"EB714C9D-C1F8-A436-B6DA-3F853C5502EA"}}`,
}
logJSON := fmt.Sprintf("[%s]", strings.Join(logs, ","))
var results []json.RawMessage
err := json.Unmarshal([]byte(logJSON), &results)
require.NoError(t, err)
ds.AppConfigFunc = func(ctx context.Context) (*fleet.AppConfig, error) {
return &fleet.AppConfig{
ServerSettings: fleet.ServerSettings{
QueryReportsDisabled: false,
},
}, nil
}
ds.QueryByNameFunc = func(ctx context.Context, teamID *uint, name string) (*fleet.Query, error) {
return &fleet.Query{
ID: 1,
DiscardData: false,
Logging: fleet.LoggingSnapshot,
}, nil
}
ds.ResultCountForQueryFunc = func(ctx context.Context, queryID uint) (int, error) {
return 0, nil
}
ds.OverwriteQueryResultRowsFunc = func(ctx context.Context, rows []*fleet.ScheduledQueryResultRow) error {
require.Len(t, rows, 1)
require.Equal(t, uint(999), rows[0].HostID)
require.NotZero(t, rows[0].LastFetched)
require.Nil(t, rows[0].Data)
return nil
}
err = svc.SubmitResultLogs(ctx, results)
require.NoError(t, err)
assert.True(t, ds.OverwriteQueryResultRowsFuncInvoked)
}
func TestSubmitResultLogsToQueryResultsDoesNotCountNullDataRows(t *testing.T) {
ds := new(mock.Store)
svc, ctx := newTestService(t, ds, nil, nil)
host := fleet.Host{
ID: 999,
}
ctx = hostctx.NewContext(ctx, &host)
logs := []string{
`{"snapshot":[],"action":"snapshot","name":"pack/Global/query_no_rows","hostIdentifier":"1379f59d98f4","calendarTime":"Tue Jan 10 20:08:51 2017 UTC","unixTime":1484078931,"decorations":{"host_uuid":"EB714C9D-C1F8-A436-B6DA-3F853C5502EA"}}`,
}
logJSON := fmt.Sprintf("[%s]", strings.Join(logs, ","))
var results []json.RawMessage
err := json.Unmarshal([]byte(logJSON), &results)
require.NoError(t, err)
ds.AppConfigFunc = func(ctx context.Context) (*fleet.AppConfig, error) {
return &fleet.AppConfig{
ServerSettings: fleet.ServerSettings{
QueryReportsDisabled: false,
},
}, nil
}
ds.QueryByNameFunc = func(ctx context.Context, teamID *uint, name string) (*fleet.Query, error) {
return &fleet.Query{
ID: 1,
DiscardData: false,
Logging: fleet.LoggingSnapshot,
}, nil
}
ds.ResultCountForQueryFunc = func(ctx context.Context, queryID uint) (int, error) {
return 0, nil
}
ds.OverwriteQueryResultRowsFunc = func(ctx context.Context, rows []*fleet.ScheduledQueryResultRow) error {
require.Len(t, rows, 1)
require.Equal(t, uint(999), rows[0].HostID)
require.NotZero(t, rows[0].LastFetched)
require.Nil(t, rows[0].Data)
return nil
}
err = svc.SubmitResultLogs(ctx, results)
require.NoError(t, err)
assert.True(t, ds.OverwriteQueryResultRowsFuncInvoked)
}
func TestGetQueryNameAndTeamIDFromResult(t *testing.T) {
tests := []struct {
input string