From 18fa2f6a02ff9ae1e4e9b1722fee543183cb5d5a Mon Sep 17 00:00:00 2001 From: Tomas Touceda Date: Thu, 8 Jul 2021 13:57:43 -0300 Subject: [PATCH] Issue 1009 calculate diff software (#1305) * First approach to diff * Refactor things for better readability and testing * Remove draft comment for algorithm * Format things a bit better * Remove unused and simplify code a bit * Refactor for readability and testing * Add changes file * Implement new approach based on review comments * Make sure to only delete from the current host * Add single uninstall test and fix code * Improve code based on review --- changes/.keep | 1 + ...-inventory-software-reduce-inserts-deletes | 2 + server/datastore/datastore_software.go | 17 ++ server/datastore/mysql/software.go | 216 +++++++++++++++--- 4 files changed, 199 insertions(+), 37 deletions(-) create mode 100644 changes/.keep create mode 100644 changes/issue-1009-inventory-software-reduce-inserts-deletes diff --git a/changes/.keep b/changes/.keep new file mode 100644 index 0000000000..040d6d585b --- /dev/null +++ b/changes/.keep @@ -0,0 +1 @@ +IGNORE BUT DO NOT REMOVE. THIS IS HERE SO THAT THE DIRECTORY REMAINS \ No newline at end of file diff --git a/changes/issue-1009-inventory-software-reduce-inserts-deletes b/changes/issue-1009-inventory-software-reduce-inserts-deletes new file mode 100644 index 0000000000..4493eab36a --- /dev/null +++ b/changes/issue-1009-inventory-software-reduce-inserts-deletes @@ -0,0 +1,2 @@ +* Reduce the amount of inserts and deletes are done in the database when updating each host's +software inventory. Fixes issue 1009 \ No newline at end of file diff --git a/server/datastore/datastore_software.go b/server/datastore/datastore_software.go index f67bf70975..a121d33291 100644 --- a/server/datastore/datastore_software.go +++ b/server/datastore/datastore_software.go @@ -76,4 +76,21 @@ func testSaveHostSoftware(t *testing.T, ds fleet.Datastore) { require.NoError(t, err) assert.False(t, host2.HostSoftware.Modified) test.ElementsMatchSkipID(t, soft2.Software, host2.HostSoftware.Software) + + soft1 = fleet.HostSoftware{ + Modified: true, + Software: []fleet.Software{ + {Name: "foo", Version: "0.0.3", Source: "chrome_extensions"}, + {Name: "towel", Version: "42.0.0", Source: "apps"}, + }, + } + host1.HostSoftware = soft1 + + err = ds.SaveHostSoftware(host1) + require.NoError(t, err) + + err = ds.LoadHostSoftware(host1) + require.NoError(t, err) + assert.False(t, host1.HostSoftware.Modified) + test.ElementsMatchSkipID(t, soft1.Software, host1.HostSoftware.Software) } diff --git a/server/datastore/mysql/software.go b/server/datastore/mysql/software.go index 983b5e78a3..87bd648e72 100644 --- a/server/datastore/mysql/software.go +++ b/server/datastore/mysql/software.go @@ -22,49 +22,53 @@ func truncateString(str string, length int) string { return str } +func softwareToUniqueString(s fleet.Software) string { + return strings.Join([]string{s.Name, s.Version, s.Source}, "\u0000") +} + +func uniqueStringToSoftware(s string) fleet.Software { + parts := strings.Split(s, "\u0000") + return fleet.Software{ + Name: truncateString(parts[0], maxSoftwareNameLen), + Version: truncateString(parts[1], maxSoftwareVersionLen), + Source: truncateString(parts[2], maxSoftwareSourceLen), + } +} + +func softwareSliceToSet(softwares []fleet.Software) map[string]bool { + result := make(map[string]bool) + for _, s := range softwares { + result[softwareToUniqueString(s)] = true + } + return result +} + +func softwareSliceToIdMap(softwareSlice []fleet.Software) map[string]uint { + result := make(map[string]uint) + for _, s := range softwareSlice { + result[softwareToUniqueString(s)] = s.ID + } + return result +} + func (d *Datastore) SaveHostSoftware(host *fleet.Host) error { if !host.HostSoftware.Modified { return nil } if err := d.withRetryTxx(func(tx *sqlx.Tx) error { - // Clear join table for this host - sql := "DELETE FROM host_software WHERE host_id = ?" - if _, err := tx.Exec(sql, host.ID); err != nil { - return errors.Wrap(err, "clear join table entries") - } - if len(host.HostSoftware.Software) == 0 { + // Clear join table for this host + sql := "DELETE FROM host_software WHERE host_id = ?" + if _, err := tx.Exec(sql, host.ID); err != nil { + return errors.Wrap(err, "clear join table entries") + } + return nil } - // Bulk insert software entries - var args []interface{} - for _, s := range host.HostSoftware.Software { - s.Name = truncateString(s.Name, maxSoftwareNameLen) - s.Version = truncateString(s.Version, maxSoftwareVersionLen) - s.Source = truncateString(s.Source, maxSoftwareSourceLen) - args = append(args, s.Name, s.Version, s.Source) - } - values := strings.TrimSuffix(strings.Repeat("(?,?,?),", len(host.HostSoftware.Software)), ",") - sql = fmt.Sprintf(` - INSERT INTO software (name, version, source) - VALUES %s - ON DUPLICATE KEY UPDATE name = name - `, values) - if _, err := tx.Exec(sql, args...); err != nil { - return errors.Wrap(err, "insert software") - } - - // Bulk update join table - sql = fmt.Sprintf(` - INSERT INTO host_software (host_id, software_id) - SELECT ?, s.id as software_id - FROM software s - WHERE (name, version, source) IN (%s) - `, values) - if _, err := tx.Exec(sql, append([]interface{}{host.ID}, args...)...); err != nil { - return errors.Wrap(err, "insert join table entries") + if err := d.applyChangesForNewSoftware(tx, host); err != nil { + return err } return nil @@ -76,16 +80,154 @@ func (d *Datastore) SaveHostSoftware(host *fleet.Host) error { return nil } -func (d *Datastore) LoadHostSoftware(host *fleet.Host) error { - host.HostSoftware = fleet.HostSoftware{Modified: false} +func nothingChanged(current []fleet.Software, incoming []fleet.Software) bool { + if len(current) != len(incoming) { + return false + } + + currentBitmap := make(map[string]bool) + for _, s := range current { + currentBitmap[softwareToUniqueString(s)] = true + } + for _, s := range incoming { + if _, ok := currentBitmap[softwareToUniqueString(s)]; !ok { + return false + } + } + + return true +} + +func (d *Datastore) applyChangesForNewSoftware(tx *sqlx.Tx, host *fleet.Host) error { + storedCurrentSoftware, err := d.hostSoftwareFromHostID(tx, host.ID) + if err != nil { + return errors.Wrap(err, "loading current software for host") + } + + if nothingChanged(storedCurrentSoftware, host.Software) { + return nil + } + + current := softwareSliceToIdMap(storedCurrentSoftware) + incoming := softwareSliceToSet(host.Software) + + if err = d.deleteUninstalledHostSoftware(tx, host.ID, current, incoming); err != nil { + return err + } + + if err = d.insertNewInstalledHostSoftware(tx, host.ID, current, incoming); err != nil { + return err + } + + return nil +} + +func (d *Datastore) deleteUninstalledHostSoftware( + tx *sqlx.Tx, + hostID uint, + currentIdmap map[string]uint, + incomingBitmap map[string]bool, +) error { + var deletesHostSoftware []interface{} + deletesHostSoftware = append(deletesHostSoftware, hostID) + + for currentKey := range currentIdmap { + if _, ok := incomingBitmap[currentKey]; !ok { + deletesHostSoftware = append(deletesHostSoftware, currentIdmap[currentKey]) + // TODO: delete from software if no host has it + } + } + if len(deletesHostSoftware) <= 1 { + return nil + } + sql := fmt.Sprintf( + `DELETE FROM host_software WHERE host_id = ? AND software_id IN (%s)`, + strings.TrimSuffix(strings.Repeat("?,", len(deletesHostSoftware)-1), ","), + ) + if _, err := tx.Exec(sql, deletesHostSoftware...); err != nil { + return errors.Wrap(err, "delete host software") + } + + return nil +} + +func (d *Datastore) getOrGenerateSoftwareId(tx *sqlx.Tx, s fleet.Software) (uint, error) { + var existingId []int64 + if err := tx.Select( + &existingId, + `SELECT id FROM software WHERE name = ? and version = ? and source = ?`, + s.Name, s.Version, s.Source, + ); err != nil { + return 0, err + } + if len(existingId) > 0 { + return uint(existingId[0]), nil + } + + result, err := tx.Exec( + `INSERT IGNORE INTO software (name, version, source) VALUES (?, ?, ?)`, + s.Name, s.Version, s.Source, + ) + if err != nil { + return 0, errors.Wrap(err, "insert software") + } + id, err := result.LastInsertId() + if err != nil { + return 0, errors.Wrap(err, "last id from software") + } + return uint(id), nil +} + +func (d *Datastore) insertNewInstalledHostSoftware( + tx *sqlx.Tx, + hostID uint, + currentIdmap map[string]uint, + incomingBitmap map[string]bool, +) error { + var insertsHostSoftware []interface{} + for s := range incomingBitmap { + if _, ok := currentIdmap[s]; !ok { + id, err := d.getOrGenerateSoftwareId(tx, uniqueStringToSoftware(s)) + if err != nil { + return err + } + insertsHostSoftware = append(insertsHostSoftware, hostID, id) + } + } + if len(insertsHostSoftware) > 0 { + values := strings.TrimSuffix(strings.Repeat("(?,?),", len(insertsHostSoftware)/2), ",") + sql := fmt.Sprintf(`INSERT INTO host_software (host_id, software_id) VALUES %s`, values) + if _, err := tx.Exec(sql, insertsHostSoftware...); err != nil { + return errors.Wrap(err, "insert host software") + } + } + + return nil +} + +func (d *Datastore) hostSoftwareFromHostID(tx *sqlx.Tx, id uint) ([]fleet.Software, error) { + selectFunc := d.db.Select + if tx != nil { + selectFunc = tx.Select + } sql := ` SELECT * FROM software WHERE id IN (SELECT software_id FROM host_software WHERE host_id = ?) ` - if err := d.db.Select(&host.HostSoftware.Software, sql, host.ID); err != nil { - return errors.Wrap(err, "load host software") + var result []fleet.Software + if err := selectFunc(&result, sql, id); err != nil { + return nil, errors.Wrap(err, "load host software") } + return result, nil +} +func (d *Datastore) LoadHostSoftware(host *fleet.Host) error { + host.HostSoftware = fleet.HostSoftware{Modified: false} + software, err := d.hostSoftwareFromHostID(nil, host.ID) + if err != nil { + return err + } + host.Software = software return nil }