Remove fk in scheduled_query_stats table and clean up orphan rows for it (#1720)

* Remove fk in scheduled_query_stats table and clean up orphan rows for it

* Improve test and fix bug with the cleanup
This commit is contained in:
Tomas Touceda 2021-08-18 18:30:48 -03:00 committed by GitHub
parent 3725781597
commit 33791dbee8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 134 additions and 0 deletions

View file

@ -0,0 +1 @@
* Remove constraints for scheduled query stats to improve performance.

View file

@ -489,6 +489,10 @@ func cronCleanups(ctx context.Context, ds fleet.Datastore, logger kitlog.Logger,
if err != nil {
level.Error(logger).Log("err", "cleaning carves", "details", err)
}
err = ds.CleanupOrphanScheduledQueryStats()
if err != nil {
level.Error(logger).Log("err", "cleaning scheduled query stats", "details", err)
}
err = trySendStatistics(ds, fleet.StatisticsFrequency, "https://fleetdm.com/api/v1/webhooks/receive-usage-analytics")
if err != nil {

View file

@ -0,0 +1,46 @@
package tables
import (
"database/sql"
"fmt"
"github.com/pkg/errors"
)
func init() {
MigrationClient.AddMigration(Up_20210818151827, Down_20210818151827)
}
func Up_20210818151827(tx *sql.Tx) error {
rows, err := tx.Query(`SELECT DISTINCT CONSTRAINT_NAME, REFERENCED_TABLE_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE TABLE_NAME = 'scheduled_query_stats' AND CONSTRAINT_NAME <> 'PRIMARY'`)
if err != nil {
return errors.Wrap(err, "getting fk for scheduled_query_stats")
}
var constraints []string
for rows.Next() {
var constraintName string
var referencedTable string
err := rows.Scan(&constraintName, &referencedTable)
if err != nil {
return errors.Wrap(err, "scanning fk for scheduled_query_stats")
}
if referencedTable == "hosts" || referencedTable == "scheduled_queries" {
constraints = append(constraints, constraintName)
}
}
if len(constraints) == 0 {
return errors.New("Found no constraints in scheduled_query_stats")
}
for _, constraint := range constraints {
_, err = tx.Exec(fmt.Sprintf(`ALTER TABLE scheduled_query_stats DROP FOREIGN KEY %s;`, constraint))
if err != nil {
return errors.Wrapf(err, "dropping fk %s", constraint)
}
}
return nil
}
func Down_20210818151827(tx *sql.Tx) error {
return nil
}

View file

@ -166,3 +166,15 @@ func (d *Datastore) ScheduledQuery(id uint) (*fleet.ScheduledQuery, error) {
return sq, nil
}
func (ds *Datastore) CleanupOrphanScheduledQueryStats() error {
_, err := ds.db.Exec(`DELETE FROM scheduled_query_stats where scheduled_query_id not in (select id from scheduled_queries where id=scheduled_query_id)`)
if err != nil {
return errors.Wrap(err, "cleaning orphan scheduled_query_stats by scheduled_query")
}
_, err = ds.db.Exec(`DELETE FROM scheduled_query_stats where host_id not in (select id from hosts where id=host_id)`)
if err != nil {
return errors.Wrap(err, "cleaning orphan scheduled_query_stats by host")
}
return nil
}

View file

@ -2,6 +2,7 @@ package mysql
import (
"testing"
"time"
"github.com/fleetdm/fleet/v4/server/fleet"
"github.com/fleetdm/fleet/v4/server/ptr"
@ -196,3 +197,62 @@ func TestCascadingDeletionOfQueries(t *testing.T) {
require.Nil(t, err)
require.Len(t, gotQueries, 1)
}
func TestCleanupOrphanScheduledQueryStats(t *testing.T) {
ds := CreateMySQLDS(t)
defer ds.Close()
u1 := test.NewUser(t, ds, "Admin", "admin@fleet.co", true)
q1 := test.NewQuery(t, ds, "foo", "select * from time;", u1.ID, true)
p1 := test.NewPack(t, ds, "baz")
h1 := test.NewHost(t, ds, "foo.local", "192.168.1.10", "1", "1", time.Now())
test.NewScheduledQuery(t, ds, p1.ID, q1.ID, 60, false, false, "1")
sq1 := test.NewScheduledQuery(t, ds, p1.ID, q1.ID, 60, false, false, "2")
_, err := ds.db.Exec(`INSERT INTO scheduled_query_stats (
host_id, scheduled_query_id, average_memory, denylisted,
executions, schedule_interval, output_size, system_time,
user_time, wall_time
) VALUES (?, ?, 32, false, 4, 4, 4, 4, 4, 4);`, h1.ID, sq1.ID)
require.NoError(t, err)
// Cleanup doesn't remove stats that are ok
require.NoError(t, ds.CleanupOrphanScheduledQueryStats())
h1, err = ds.Host(h1.ID)
require.NoError(t, err)
require.Len(t, h1.PackStats, 1)
// now we insert a bogus stat
_, err = ds.db.Exec(`INSERT INTO scheduled_query_stats (
host_id, scheduled_query_id, average_memory, denylisted, executions
) VALUES (?, 999, 32, false, 2);`, h1.ID)
require.NoError(t, err)
// and also for an unknown host
_, err = ds.db.Exec(`INSERT INTO scheduled_query_stats (
host_id, scheduled_query_id, average_memory, denylisted, executions
) VALUES (888, 999, 32, true, 4);`)
require.NoError(t, err)
// And we don't see it in the host
h1, err = ds.Host(h1.ID)
require.NoError(t, err)
require.Len(t, h1.PackStats, 1)
// but there are definitely there
var count int
err = ds.db.Get(&count, `SELECT count(*) FROM scheduled_query_stats`)
require.NoError(t, err)
assert.Equal(t, 3, count)
// now we clean it up
require.NoError(t, ds.CleanupOrphanScheduledQueryStats())
err = ds.db.Get(&count, `SELECT count(*) FROM scheduled_query_stats`)
require.NoError(t, err)
assert.Equal(t, 1, count)
h1, err = ds.Host(h1.ID)
require.NoError(t, err)
require.Len(t, h1.PackStats, 1)
}

View file

@ -13,6 +13,7 @@ type ScheduledQueryStore interface {
SaveScheduledQuery(sq *ScheduledQuery) (*ScheduledQuery, error)
DeleteScheduledQuery(id uint) error
ScheduledQuery(id uint) (*ScheduledQuery, error)
CleanupOrphanScheduledQueryStats() error
}
type ScheduledQueryService interface {

View file

@ -16,6 +16,8 @@ type DeleteScheduledQueryFunc func(id uint) error
type ScheduledQueryFunc func(id uint) (*fleet.ScheduledQuery, error)
type CleanupOrphanScheduledQueryStatsFunc func() error
type ScheduledQueryStore struct {
ListScheduledQueriesInPackFunc ListScheduledQueriesInPackFunc
ListScheduledQueriesInPackFuncInvoked bool
@ -31,6 +33,9 @@ type ScheduledQueryStore struct {
ScheduledQueryFunc ScheduledQueryFunc
ScheduledQueryFuncInvoked bool
CleanupOrphanScheduledQueryStatsFunc CleanupOrphanScheduledQueryStatsFunc
CleanupOrphanScheduledQueryStatsFuncInvoked bool
}
func (s *ScheduledQueryStore) ListScheduledQueriesInPack(id uint, opts fleet.ListOptions) ([]*fleet.ScheduledQuery, error) {
@ -57,3 +62,8 @@ func (s *ScheduledQueryStore) ScheduledQuery(id uint) (*fleet.ScheduledQuery, er
s.ScheduledQueryFuncInvoked = true
return s.ScheduledQueryFunc(id)
}
func (s *ScheduledQueryStore) CleanupOrphanScheduledQueryStats() error {
s.CleanupOrphanScheduledQueryStatsFuncInvoked = true
return s.CleanupOrphanScheduledQueryStatsFunc()
}