mirror of
https://github.com/fleetdm/fleet
synced 2026-05-24 09:28:54 +00:00
**Related issue:** Resolves #42741 ## Problem `goval_dictionary.Analyze` opened a `*sql.DB` via `LoadDb` but never closed it. `pkg/download/download.go` atomically renames the goval sqlite on each refresh, unlinking the old inode while the pool still held FDs on it. lsof showed them as `(deleted)`, accumulating over days until Fleet server restart. ## Fix - New `Database.Close()` that delegates to the underlying `*sql.DB`. - `defer func() { _ = db.Close() }()` in `Analyze` right after `LoadDb`. ## How this was tested - New unit test `TestDatabaseCloseReleasesFileHandle` opens a file-backed sqlite, runs a query to force a pool connection, then asserts Close drains the pool and blocks further queries. - `go test ./server/vulnerabilities/goval_dictionary/...` passes. - Standalone Go program reproduced the leak mechanism: `sql.Open` + query + unlink left the FD on the orphaned inode; adding Close released it. ## Confidence and QA ~90% confident. I did not reproduce end-to-end through Fleet's vuln cron locally (the analyzer never entered its query loop; likely `HostIDsByOSVersion` hadn't populated for the Rocky test host). Reviewer: flag anything that drops your confidence. @xpkoala for QA after merge: please exercise in a production-like env with enrolled RHEL hosts and confirm no `(deleted)` FDs after goval refreshes. # Checklist for submitter - [x] Changes file added for user-visible changes in `changes/` (`changes/42741-fix-goval-dictionary-fd-leak`). - [x] Input data is properly validated, `SELECT *` is avoided, SQL injection is prevented (N/A, no new input paths). - [x] Timeouts are implemented and retries are limited to avoid infinite loops (N/A, no new network calls). - [x] If paths of existing endpoints are modified without backwards compatibility, checked the frontend/CLI for any necessary changes (N/A, no endpoint changes). ## Testing - [x] Added/updated automated tests - [x] Where appropriate, automated tests simulate multiple hosts and test for host isolation (N/A, package-level unit test). - [ ] QA'd all new/changed functionality manually (pending, post-merge by @xpkoala). ## Database migrations - [x] Checked schema for modified tables for auto-updating timestamp columns (N/A, no schema changes). - [x] Confirmed timestamp updates are acceptable (N/A, no schema changes). - [x] Ensured correct collation is explicitly set for character columns (N/A, no schema changes). ## New Fleet configuration settings - [x] Setting(s) is/are explicitly excluded from GitOps (N/A, no new settings). ## fleetd/orbit/Fleet Desktop - [x] Verified compatibility with the latest released version of Fleet (N/A, server-only change). - [x] If the change applies to only one platform, confirmed `runtime.GOOS` is used (N/A). - [x] Verified fleetd runs on macOS, Linux and Windows (N/A, server-only change). - [x] Verified auto-update works (N/A, server-only change). <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Fixed a file-descriptor leak in vulnerability processing so deleted SQLite database files are properly closed without requiring a server restart, improving stability and resource usage. * **Tests** * Added a regression test to ensure database handles are released after close. * **Documentation** * Documented the fix for the file-descriptor leak. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
144 lines
3.9 KiB
Go
144 lines
3.9 KiB
Go
package goval_dictionary
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
|
|
"github.com/fleetdm/fleet/v4/server/fleet"
|
|
"github.com/fleetdm/fleet/v4/server/vulnerabilities/oval"
|
|
"github.com/fleetdm/fleet/v4/server/vulnerabilities/utils"
|
|
)
|
|
|
|
const (
|
|
hostsBatchSize = 500
|
|
vulnBatchSize = 500
|
|
)
|
|
|
|
var ErrUnsupportedPlatform = errors.New("unsupported platform")
|
|
|
|
// Analyze scans all hosts for vulnerabilities based on the sqlite output of goval-dictionary
|
|
// for their platform, inserting any new vulnerabilities and deleting anything patched.
|
|
// Returns nil, nil when the platform isn't supported.
|
|
func Analyze(
|
|
ctx context.Context,
|
|
ds fleet.Datastore,
|
|
ver fleet.OSVersion,
|
|
vulnPath string,
|
|
collectVulns bool,
|
|
logger *slog.Logger,
|
|
) ([]fleet.SoftwareVulnerability, error) {
|
|
platform := oval.NewPlatform(ver.Platform, ver.Name)
|
|
source := fleet.GovalDictionarySource
|
|
if !platform.IsGovalDictionarySupported() {
|
|
return nil, ErrUnsupportedPlatform
|
|
}
|
|
db, err := LoadDb(platform, vulnPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if err := db.Close(); err != nil {
|
|
logger.ErrorContext(ctx, "failed to close goval dictionary database", "platform", platform, "vuln_path", vulnPath, "err", err)
|
|
}
|
|
}()
|
|
|
|
// For kernel-only platforms (e.g., RHEL), we only scan kernel packages via goval-dictionary.
|
|
// Non-kernel packages are scanned via regular OVAL processing. This keeps the testing
|
|
// surface smaller. We can consider expanding scope to all packages in the future if needed.
|
|
kernelsOnly := platform.IsGovalDictionaryKernelOnly()
|
|
|
|
// Since hosts and software have a M:N relationship, the following sets are used to
|
|
// avoid doing duplicated inserts/delete operations (a vulnerable software might be
|
|
// present in many hosts).
|
|
toInsertSet := make(map[string]fleet.SoftwareVulnerability)
|
|
toDeleteSet := make(map[string]fleet.SoftwareVulnerability)
|
|
|
|
var offset int
|
|
for {
|
|
hostIDs, err := ds.HostIDsByOSVersion(ctx, ver, offset, hostsBatchSize)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(hostIDs) == 0 {
|
|
break
|
|
}
|
|
offset += hostsBatchSize
|
|
|
|
foundInBatch := make(map[uint][]fleet.SoftwareVulnerability)
|
|
for _, hostID := range hostIDs {
|
|
hostID := hostID
|
|
software, err := ds.ListSoftwareForVulnDetection(ctx, fleet.VulnSoftwareFilter{
|
|
HostID: &hostID,
|
|
KernelsOnly: kernelsOnly,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
vulnerabilities := db.Eval(ctx, software, logger)
|
|
foundInBatch[hostID] = vulnerabilities
|
|
}
|
|
|
|
existingInBatch, err := ds.ListSoftwareVulnerabilitiesByHostIDsSource(ctx, hostIDs, source)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, hostID := range hostIDs {
|
|
inserts, deletes := utils.VulnsDelta(foundInBatch[hostID], existingInBatch[hostID])
|
|
for _, i := range inserts {
|
|
toInsertSet[i.Key()] = i
|
|
}
|
|
for _, d := range deletes {
|
|
toDeleteSet[d.Key()] = d
|
|
}
|
|
}
|
|
}
|
|
|
|
err = utils.BatchProcess(toDeleteSet, func(v []fleet.SoftwareVulnerability) error {
|
|
return ds.DeleteSoftwareVulnerabilities(ctx, v)
|
|
}, vulnBatchSize)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
allVulns := make([]fleet.SoftwareVulnerability, 0, len(toInsertSet))
|
|
for _, v := range toInsertSet {
|
|
allVulns = append(allVulns, v)
|
|
}
|
|
|
|
newVulns, err := ds.InsertSoftwareVulnerabilities(ctx, allVulns, source)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !collectVulns {
|
|
return nil, nil
|
|
}
|
|
|
|
return newVulns, nil
|
|
}
|
|
|
|
// LoadDb returns the latest goval_dictionary database for the given platform.
|
|
func LoadDb(platform oval.Platform, vulnPath string) (*Database, error) {
|
|
if !platform.IsGovalDictionarySupported() {
|
|
return nil, fmt.Errorf("platform %q not supported", platform)
|
|
}
|
|
|
|
fileName := platform.ToGovalDictionaryFilename()
|
|
latest, err := utils.LatestFile(fileName, vulnPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
sqlite, err := sql.Open("sqlite3", latest)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
db := NewDB(sqlite, platform)
|
|
return db, nil
|
|
}
|