fleet/server/vulnerabilities/oval/analyzer.go
Victor Lyuboslavsky 092b51f1c2
Vulnerabilities cron optimization (#39820)
<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #31820 and #39898

Vulnerability processing performance improvements, and added OTEL spans
to the vulnerabilities cron job.
Optimized the two main bottlenecks in the vulnerability cron job: CPE
matching and CVE insertion. In my loadtest testing (10K hosts), the
overall initial vulnerabilities job went from over 2 hours down to 53
minutes, and the number of spans (DB accesses) went from ~2 million to
~90K.

1. CPE matching (TranslateSoftwareToCPE): replaced the goqu query
builder with hand-written SQL using raw database/sql queries. Replaced
UNION with separate queries because case number 3 was an expensive full
text match operation and in most cases we did not need to do that.

2. CVE insertion (TranslateCPEToCVE and other places): replaced
individual INSERT INTO software_cve ... VALUES (?,?,?,?) calls with
batch inserts of 500 rows each, using the existing BatchProcessSimple
helper. Same pattern applied to OS vulnerability inserts using the
existing InsertOSVulnerabilities batch method.

Functional equivalence verified using osquery perf dataset locally. Both
changes produce identical output (22,366 CPEs, 131,233 CVEs) when
compared against the old code using a before/after comparison tool.
- CPE caveats: bugs #39898 and
https://github.com/fleetdm/fleet/issues/39899 found

# Checklist for submitter

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.
See [Changes
files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/guides/committing-changes.md#changes-files)
for more information.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Expanded tracing for automated vulnerability workflows to improve
observability.

* **Performance**
* Bulk/batched processing for software and OS vulnerability inserts to
speed ingestion and downstream tasks.
* More efficient CPE lookup and read-optimized database access for
faster translations.

* **Bug Fixes**
* Improved error recording and read-after-write consistency to reduce
missed or duplicate vulnerability notifications.

* **Tests**
  * Test suite updated to support batch insertion semantics.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-02-18 13:59:15 -06:00

184 lines
4.4 KiB
Go

package oval
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
"time"
"github.com/fleetdm/fleet/v4/server/fleet"
oval_parsed "github.com/fleetdm/fleet/v4/server/vulnerabilities/oval/parsed"
utils "github.com/fleetdm/fleet/v4/server/vulnerabilities/utils"
)
const (
hostsBatchSize = 500
vulnBatchSize = 500
)
var ErrUnsupportedPlatform = errors.New("unsupported platform")
// Analyze scans all hosts for vulnerabilities based on the OVAL definitions for their platform,
// inserting any new vulnerabilities and deleting anything patched. Returns nil, nil when
// the platform isn't supported.
func Analyze(
ctx context.Context,
ds fleet.Datastore,
ver fleet.OSVersion,
vulnPath string,
collectVulns bool,
) ([]fleet.SoftwareVulnerability, error) {
platform := NewPlatform(ver.Platform, ver.Name)
source := fleet.UbuntuOVALSource
if platform.IsRedHat() {
source = fleet.RHELOVALSource
}
if !platform.IsSupported() {
return nil, ErrUnsupportedPlatform
}
defs, err := loadDef(platform, vulnPath)
if err != nil {
return nil, err
}
rules, err := GetKnownOVALBugRules()
if err != nil {
return nil, err
}
// Since hosts and software have a M:N relationship, the following sets are used to
// avoid doing duplicated inserts/delete operations (a vulnerable software might be
// present in many hosts).
toInsertSet := make(map[string]fleet.SoftwareVulnerability)
toDeleteSet := make(map[string]fleet.SoftwareVulnerability)
var offset int
for {
hostIDs, err := ds.HostIDsByOSVersion(ctx, ver, offset, hostsBatchSize)
if err != nil {
return nil, err
}
if len(hostIDs) == 0 {
break
}
offset += hostsBatchSize
foundInBatch := make(map[uint][]fleet.SoftwareVulnerability)
for _, hostID := range hostIDs {
hostID := hostID
software, err := ds.ListSoftwareForVulnDetection(ctx, fleet.VulnSoftwareFilter{HostID: &hostID})
if err != nil {
return nil, err
}
evalR, err := defs.Eval(ver, software)
if err != nil {
return nil, err
}
foundInBatch[hostID] = evalR
evalU, err := defs.EvalKernel(software)
if err != nil {
return nil, err
}
foundInBatch[hostID] = append(foundInBatch[hostID], evalU...)
// Create a map of id: software for each
// pair (id, cve) in foundInBatch for this host
softwareIDs := make(map[uint]fleet.Software)
for _, s := range software {
softwareIDs[s.ID] = s
}
filteredBatch := make([]fleet.SoftwareVulnerability, 0, len(foundInBatch[hostID]))
for _, v := range foundInBatch[hostID] {
software := softwareIDs[v.SoftwareID]
skip := rules.MatchesAny(software, v.CVE)
if !skip {
filteredBatch = append(filteredBatch, v)
}
}
foundInBatch[hostID] = filteredBatch
}
existingInBatch, err := ds.ListSoftwareVulnerabilitiesByHostIDsSource(ctx, hostIDs, source)
if err != nil {
return nil, err
}
for _, hostID := range hostIDs {
insrt, del := utils.VulnsDelta(foundInBatch[hostID], existingInBatch[hostID])
for _, i := range insrt {
toInsertSet[i.Key()] = i
}
for _, d := range del {
toDeleteSet[d.Key()] = d
}
}
}
err = utils.BatchProcess(toDeleteSet, func(v []fleet.SoftwareVulnerability) error {
return ds.DeleteSoftwareVulnerabilities(ctx, v)
}, vulnBatchSize)
if err != nil {
return nil, err
}
allVulns := make([]fleet.SoftwareVulnerability, 0, len(toInsertSet))
for _, v := range toInsertSet {
allVulns = append(allVulns, v)
}
newVulns, err := ds.InsertSoftwareVulnerabilities(ctx, allVulns, source)
if err != nil {
return nil, err
}
if !collectVulns {
return nil, nil
}
return newVulns, nil
}
// loadDef returns the latest oval Definition for the given platform.
func loadDef(platform Platform, vulnPath string) (oval_parsed.Result, error) {
if !platform.IsSupported() {
return nil, fmt.Errorf("platform %q not supported", platform)
}
fileName := platform.ToFilename(time.Now(), "json")
latest, err := utils.LatestFile(fileName, vulnPath)
if err != nil {
return nil, err
}
payload, err := os.ReadFile(latest)
if err != nil {
return nil, err
}
if platform.IsUbuntu() {
result := oval_parsed.UbuntuResult{}
if err := json.Unmarshal(payload, &result); err != nil {
return nil, err
}
return result, nil
}
if platform.IsRedHat() {
result := oval_parsed.RhelResult{}
if err := json.Unmarshal(payload, &result); err != nil {
return nil, err
}
return result, nil
}
return nil, fmt.Errorf("don't know how to parse file %q for %q platform", latest, platform)
}