fleet/server/vulnerabilities/macoffice/analyzer.go
Victor Lyuboslavsky 092b51f1c2
Vulnerabilities cron optimization (#39820)
<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #31820 and #39898

Vulnerability processing performance improvements, and added OTEL spans
to the vulnerabilities cron job.
Optimized the two main bottlenecks in the vulnerability cron job: CPE
matching and CVE insertion. In my loadtest testing (10K hosts), the
overall initial vulnerabilities job went from over 2 hours down to 53
minutes, and the number of spans (DB accesses) went from ~2 million to
~90K.

1. CPE matching (TranslateSoftwareToCPE): replaced the goqu query
builder with hand-written SQL using raw database/sql queries. Replaced
UNION with separate queries because case number 3 was an expensive full
text match operation and in most cases we did not need to do that.

2. CVE insertion (TranslateCPEToCVE and other places): replaced
individual INSERT INTO software_cve ... VALUES (?,?,?,?) calls with
batch inserts of 500 rows each, using the existing BatchProcessSimple
helper. Same pattern applied to OS vulnerability inserts using the
existing InsertOSVulnerabilities batch method.

Functional equivalence verified using osquery perf dataset locally. Both
changes produce identical output (22,366 CPEs, 131,233 CVEs) when
compared against the old code using a before/after comparison tool.
- CPE caveats: bugs #39898 and
https://github.com/fleetdm/fleet/issues/39899 found

# Checklist for submitter

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.
See [Changes
files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/guides/committing-changes.md#changes-files)
for more information.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Expanded tracing for automated vulnerability workflows to improve
observability.

* **Performance**
* Bulk/batched processing for software and OS vulnerability inserts to
speed ingestion and downstream tasks.
* More efficient CPE lookup and read-optimized database access for
faster translations.

* **Bug Fixes**
* Improved error recording and read-after-write consistency to reduce
missed or duplicate vulnerability notifications.

* **Tests**
  * Test suite updated to support batch insertion semantics.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-02-18 13:59:15 -06:00

188 lines
4.8 KiB
Go

package macoffice
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
"github.com/fleetdm/fleet/v4/server/fleet"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/io"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/utils"
)
// getLatestReleaseNotes returns the most recent Mac Office release notes asset (based on the date in the
// filename) contained in 'vulnPath'
func getLatestReleaseNotes(vulnPath string) (ReleaseNotes, error) {
fs := io.NewFSClient(vulnPath)
files, err := fs.MacOfficeReleaseNotes()
if err != nil {
return nil, err
}
if len(files) == 0 {
return nil, nil
}
sort.Slice(files, func(i, j int) bool { return files[j].Before(files[i]) })
filePath := filepath.Join(vulnPath, files[0].String())
payload, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
relNotes := ReleaseNotes{}
err = json.Unmarshal(payload, &relNotes)
if err != nil {
return nil, err
}
// Ensure the release notes are sorted by release date, this is because the vuln. processing
// algo. will stop when a release note older than the current software version is found.
sort.Slice(relNotes, func(i, j int) bool { return relNotes[j].Date.Before(relNotes[i].Date) })
return relNotes, nil
}
// collectVulnerabilities compares 'software' against all 'release notes' returning all detected
// vulnerabilities.
func collectVulnerabilities(
software *fleet.Software,
product ProductType,
relNotes ReleaseNotes,
) []fleet.SoftwareVulnerability {
var vulns []fleet.SoftwareVulnerability
for _, relNote := range relNotes {
// We only care about release notes with set versions and with security updates,
// 'relNotes' should only contain valid release notes, but this check not expensive.
if !relNote.Valid() {
continue
}
if relNote.CmpVersion(software.Version) <= 0 {
return vulns
}
for _, cve := range relNote.CollectVulnerabilities(product) {
vulns = append(vulns, fleet.SoftwareVulnerability{
SoftwareID: software.ID,
CVE: cve,
})
}
}
return vulns
}
// getStoredVulnerabilities return all stored vulnerabilities for 'softwareID'
func getStoredVulnerabilities(
ctx context.Context,
ds fleet.Datastore,
softwareID uint,
) ([]fleet.SoftwareVulnerability, error) {
storedSoftware, err := ds.SoftwareByID(ctx, softwareID, nil, false, nil)
if err != nil {
return nil, err
}
var result []fleet.SoftwareVulnerability
for _, v := range storedSoftware.Vulnerabilities {
result = append(result, fleet.SoftwareVulnerability{
SoftwareID: storedSoftware.ID,
CVE: v.CVE,
})
}
return result, nil
}
func updateVulnsInDB(
ctx context.Context,
ds fleet.Datastore,
detected []fleet.SoftwareVulnerability,
existing []fleet.SoftwareVulnerability,
) ([]fleet.SoftwareVulnerability, error) {
toInsert, toDelete := utils.VulnsDelta(detected, existing)
// Remove any possible dups...
toInsertSet := make(map[string]fleet.SoftwareVulnerability, len(toInsert))
for _, i := range toInsert {
toInsertSet[i.Key()] = i
}
err := ds.DeleteSoftwareVulnerabilities(ctx, toDelete)
if err != nil {
return nil, err
}
allVulns := make([]fleet.SoftwareVulnerability, 0, len(toInsertSet))
for _, v := range toInsertSet {
allVulns = append(allVulns, v)
}
return ds.InsertSoftwareVulnerabilities(ctx, allVulns, fleet.MacOfficeReleaseNotesSource)
}
// Analyze uses the most recent Mac Office release notes asset in 'vulnPath' for detecting
// vulnerabilities on Mac Office apps.
func Analyze(
ctx context.Context,
ds fleet.Datastore,
vulnPath string,
collectVulns bool,
) ([]fleet.SoftwareVulnerability, error) {
relNotes, err := getLatestReleaseNotes(vulnPath)
if err != nil {
return nil, err
}
if len(relNotes) == 0 {
return nil, nil
}
queryParams := fleet.SoftwareIterQueryOptions{IncludedSources: []string{"apps"}}
iter, err := ds.AllSoftwareIterator(ctx, queryParams)
if err != nil {
return nil, err
}
defer iter.Close()
var vulnerabilities []fleet.SoftwareVulnerability
for iter.Next() {
software, err := iter.Value()
if err != nil {
return nil, ctxerr.Wrap(ctx, err, "getting software from iterator")
}
product, ok := OfficeProductFromBundleId(software.BundleIdentifier)
// If we don't have an Office Product ...
if !ok {
continue
}
detected := collectVulnerabilities(software, product, relNotes)
// The 'software' instance we get back from the iterator does not include vulnerabilities...
existing, err := getStoredVulnerabilities(ctx, ds, software.ID)
if err != nil {
return nil, err
}
inserted, err := updateVulnsInDB(ctx, ds, detected, existing)
if err != nil {
return nil, err
}
if collectVulns {
vulnerabilities = append(vulnerabilities, inserted...)
}
}
if err := iter.Err(); err != nil {
return nil, fmt.Errorf("iter: %w", err)
}
return vulnerabilities, nil
}