From 33791dbee86dbcbbaf5af57791c54028e666f7ed Mon Sep 17 00:00:00 2001 From: Tomas Touceda Date: Wed, 18 Aug 2021 18:30:48 -0300 Subject: [PATCH] Remove fk in scheduled_query_stats table and clean up orphan rows for it (#1720) * Remove fk in scheduled_query_stats table and clean up orphan rows for it * Improve test and fix bug with the cleanup --- changes/improve-performance-of-stats | 1 + cmd/fleet/serve.go | 4 ++ ...0818151827_RemoveForeignKeysSchedQStats.go | 46 ++++++++++++++ server/datastore/mysql/scheduled_queries.go | 12 ++++ .../datastore/mysql/scheduled_queries_test.go | 60 +++++++++++++++++++ server/fleet/scheduled_queries.go | 1 + server/mock/datastore_scheduled_queries.go | 10 ++++ 7 files changed, 134 insertions(+) create mode 100644 changes/improve-performance-of-stats create mode 100644 server/datastore/mysql/migrations/tables/20210818151827_RemoveForeignKeysSchedQStats.go diff --git a/changes/improve-performance-of-stats b/changes/improve-performance-of-stats new file mode 100644 index 0000000000..167dd29ef6 --- /dev/null +++ b/changes/improve-performance-of-stats @@ -0,0 +1 @@ +* Remove constraints for scheduled query stats to improve performance. diff --git a/cmd/fleet/serve.go b/cmd/fleet/serve.go index 64a470366a..3cda7576a6 100644 --- a/cmd/fleet/serve.go +++ b/cmd/fleet/serve.go @@ -489,6 +489,10 @@ func cronCleanups(ctx context.Context, ds fleet.Datastore, logger kitlog.Logger, if err != nil { level.Error(logger).Log("err", "cleaning carves", "details", err) } + err = ds.CleanupOrphanScheduledQueryStats() + if err != nil { + level.Error(logger).Log("err", "cleaning scheduled query stats", "details", err) + } err = trySendStatistics(ds, fleet.StatisticsFrequency, "https://fleetdm.com/api/v1/webhooks/receive-usage-analytics") if err != nil { diff --git a/server/datastore/mysql/migrations/tables/20210818151827_RemoveForeignKeysSchedQStats.go b/server/datastore/mysql/migrations/tables/20210818151827_RemoveForeignKeysSchedQStats.go new file mode 100644 index 0000000000..ec2424f04e --- /dev/null +++ b/server/datastore/mysql/migrations/tables/20210818151827_RemoveForeignKeysSchedQStats.go @@ -0,0 +1,46 @@ +package tables + +import ( + "database/sql" + "fmt" + + "github.com/pkg/errors" +) + +func init() { + MigrationClient.AddMigration(Up_20210818151827, Down_20210818151827) +} + +func Up_20210818151827(tx *sql.Tx) error { + rows, err := tx.Query(`SELECT DISTINCT CONSTRAINT_NAME, REFERENCED_TABLE_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE TABLE_NAME = 'scheduled_query_stats' AND CONSTRAINT_NAME <> 'PRIMARY'`) + if err != nil { + return errors.Wrap(err, "getting fk for scheduled_query_stats") + } + var constraints []string + for rows.Next() { + var constraintName string + var referencedTable string + err := rows.Scan(&constraintName, &referencedTable) + if err != nil { + return errors.Wrap(err, "scanning fk for scheduled_query_stats") + } + if referencedTable == "hosts" || referencedTable == "scheduled_queries" { + constraints = append(constraints, constraintName) + } + } + if len(constraints) == 0 { + return errors.New("Found no constraints in scheduled_query_stats") + } + + for _, constraint := range constraints { + _, err = tx.Exec(fmt.Sprintf(`ALTER TABLE scheduled_query_stats DROP FOREIGN KEY %s;`, constraint)) + if err != nil { + return errors.Wrapf(err, "dropping fk %s", constraint) + } + } + return nil +} + +func Down_20210818151827(tx *sql.Tx) error { + return nil +} diff --git a/server/datastore/mysql/scheduled_queries.go b/server/datastore/mysql/scheduled_queries.go index 05c5d356ce..b4d2d5386b 100644 --- a/server/datastore/mysql/scheduled_queries.go +++ b/server/datastore/mysql/scheduled_queries.go @@ -166,3 +166,15 @@ func (d *Datastore) ScheduledQuery(id uint) (*fleet.ScheduledQuery, error) { return sq, nil } + +func (ds *Datastore) CleanupOrphanScheduledQueryStats() error { + _, err := ds.db.Exec(`DELETE FROM scheduled_query_stats where scheduled_query_id not in (select id from scheduled_queries where id=scheduled_query_id)`) + if err != nil { + return errors.Wrap(err, "cleaning orphan scheduled_query_stats by scheduled_query") + } + _, err = ds.db.Exec(`DELETE FROM scheduled_query_stats where host_id not in (select id from hosts where id=host_id)`) + if err != nil { + return errors.Wrap(err, "cleaning orphan scheduled_query_stats by host") + } + return nil +} diff --git a/server/datastore/mysql/scheduled_queries_test.go b/server/datastore/mysql/scheduled_queries_test.go index 35f2339071..c31667c98c 100644 --- a/server/datastore/mysql/scheduled_queries_test.go +++ b/server/datastore/mysql/scheduled_queries_test.go @@ -2,6 +2,7 @@ package mysql import ( "testing" + "time" "github.com/fleetdm/fleet/v4/server/fleet" "github.com/fleetdm/fleet/v4/server/ptr" @@ -196,3 +197,62 @@ func TestCascadingDeletionOfQueries(t *testing.T) { require.Nil(t, err) require.Len(t, gotQueries, 1) } + +func TestCleanupOrphanScheduledQueryStats(t *testing.T) { + ds := CreateMySQLDS(t) + defer ds.Close() + + u1 := test.NewUser(t, ds, "Admin", "admin@fleet.co", true) + q1 := test.NewQuery(t, ds, "foo", "select * from time;", u1.ID, true) + p1 := test.NewPack(t, ds, "baz") + h1 := test.NewHost(t, ds, "foo.local", "192.168.1.10", "1", "1", time.Now()) + test.NewScheduledQuery(t, ds, p1.ID, q1.ID, 60, false, false, "1") + sq1 := test.NewScheduledQuery(t, ds, p1.ID, q1.ID, 60, false, false, "2") + + _, err := ds.db.Exec(`INSERT INTO scheduled_query_stats ( + host_id, scheduled_query_id, average_memory, denylisted, + executions, schedule_interval, output_size, system_time, + user_time, wall_time + ) VALUES (?, ?, 32, false, 4, 4, 4, 4, 4, 4);`, h1.ID, sq1.ID) + require.NoError(t, err) + + // Cleanup doesn't remove stats that are ok + require.NoError(t, ds.CleanupOrphanScheduledQueryStats()) + + h1, err = ds.Host(h1.ID) + require.NoError(t, err) + require.Len(t, h1.PackStats, 1) + + // now we insert a bogus stat + _, err = ds.db.Exec(`INSERT INTO scheduled_query_stats ( + host_id, scheduled_query_id, average_memory, denylisted, executions + ) VALUES (?, 999, 32, false, 2);`, h1.ID) + require.NoError(t, err) + // and also for an unknown host + _, err = ds.db.Exec(`INSERT INTO scheduled_query_stats ( + host_id, scheduled_query_id, average_memory, denylisted, executions + ) VALUES (888, 999, 32, true, 4);`) + require.NoError(t, err) + + // And we don't see it in the host + h1, err = ds.Host(h1.ID) + require.NoError(t, err) + require.Len(t, h1.PackStats, 1) + + // but there are definitely there + var count int + err = ds.db.Get(&count, `SELECT count(*) FROM scheduled_query_stats`) + require.NoError(t, err) + assert.Equal(t, 3, count) + + // now we clean it up + require.NoError(t, ds.CleanupOrphanScheduledQueryStats()) + + err = ds.db.Get(&count, `SELECT count(*) FROM scheduled_query_stats`) + require.NoError(t, err) + assert.Equal(t, 1, count) + + h1, err = ds.Host(h1.ID) + require.NoError(t, err) + require.Len(t, h1.PackStats, 1) +} diff --git a/server/fleet/scheduled_queries.go b/server/fleet/scheduled_queries.go index 6ad8af7cd7..33108f4d9f 100644 --- a/server/fleet/scheduled_queries.go +++ b/server/fleet/scheduled_queries.go @@ -13,6 +13,7 @@ type ScheduledQueryStore interface { SaveScheduledQuery(sq *ScheduledQuery) (*ScheduledQuery, error) DeleteScheduledQuery(id uint) error ScheduledQuery(id uint) (*ScheduledQuery, error) + CleanupOrphanScheduledQueryStats() error } type ScheduledQueryService interface { diff --git a/server/mock/datastore_scheduled_queries.go b/server/mock/datastore_scheduled_queries.go index 4f4f4e7f99..2458337269 100644 --- a/server/mock/datastore_scheduled_queries.go +++ b/server/mock/datastore_scheduled_queries.go @@ -16,6 +16,8 @@ type DeleteScheduledQueryFunc func(id uint) error type ScheduledQueryFunc func(id uint) (*fleet.ScheduledQuery, error) +type CleanupOrphanScheduledQueryStatsFunc func() error + type ScheduledQueryStore struct { ListScheduledQueriesInPackFunc ListScheduledQueriesInPackFunc ListScheduledQueriesInPackFuncInvoked bool @@ -31,6 +33,9 @@ type ScheduledQueryStore struct { ScheduledQueryFunc ScheduledQueryFunc ScheduledQueryFuncInvoked bool + + CleanupOrphanScheduledQueryStatsFunc CleanupOrphanScheduledQueryStatsFunc + CleanupOrphanScheduledQueryStatsFuncInvoked bool } func (s *ScheduledQueryStore) ListScheduledQueriesInPack(id uint, opts fleet.ListOptions) ([]*fleet.ScheduledQuery, error) { @@ -57,3 +62,8 @@ func (s *ScheduledQueryStore) ScheduledQuery(id uint) (*fleet.ScheduledQuery, er s.ScheduledQueryFuncInvoked = true return s.ScheduledQueryFunc(id) } + +func (s *ScheduledQueryStore) CleanupOrphanScheduledQueryStats() error { + s.CleanupOrphanScheduledQueryStatsFuncInvoked = true + return s.CleanupOrphanScheduledQueryStatsFunc() +}